---
title: "Won-nov-19"
format: html
editor: visual
---

## Figure 7 and Figure 8 (Won et al)

First, let us load in the packages

```{r}
library(devtools)
#install.packages("lme4", type = "source")
#install_github("naoki-egami/dsl", ref = "update-Aug2024", dependencies = TRUE, force = TRUE)
library(dsl)
library(janitor)
library(estimatr)
library(tidyverse)
library(broom)
library(jtools)
library(tidyverse)
library(ggplot2)
library(broom.mixed)
```

First, we will read in two tables, the first with the annotations generated by the ResNet application and the second with the human annotations, both from Won et al (2017)

```{r}
won_machine_df <- read.csv(file = "~/Downloads/DSL images/WonetAl_result.csv")
won_human_df <- read.table(file="~/Downloads/DSL images/annot_test.txt", sep="\t", quote="", comment.char="")
# some basic cleaning
won_human_df <- as.data.frame(won_human_df)
won_human_df <- won_human_df %>% row_to_names(row_number = 1)
won_machine_df['protest_h'] <- won_human_df['protest']
won_protest_human <- subset(won_human_df,protest == 1)
won_protest_machine <- subset(won_machine_df,protest_h == 1)
```

Now, since we are only interested in estimating perceived level of violence, let's import our covariates to our dataframe with the surrogate predictions.

```{r}
won_protest_machine['sign'] <- won_protest_human['sign']
won_protest_machine['photo'] <- won_protest_human['photo']
won_protest_machine['fire'] <- won_protest_human['fire']
won_protest_machine['police'] <- won_protest_human['police']
won_protest_machine['children'] <- won_protest_human['children']
won_protest_machine['group_20'] <- won_protest_human['group_20']
won_protest_machine['flag'] <- won_protest_human['flag']
won_protest_machine['night'] <- won_protest_human['night']
won_protest_machine['shouting'] <- won_protest_human['shouting']
```

Now, let us make a function which samples with replacement and then without replacement for a given value of $N$ labeled observations.

```{r}
won_sampling <- function(won_protest_machine, N, seed){
  set.seed(seed)
  won_protest_machine$violence_truth <- won_protest_human$violence

  # Sample with replacement from the rows of the data frame
  sample_won <- won_protest_machine[sample(nrow(won_protest_machine), size = nrow(won_protest_machine), replace = TRUE), ]

  # Randomly select a subset of rows to assign NA (exclude N rows)
  sub_sample_indices <- sample(nrow(sample_won), size = (nrow(sample_won) - N))
  sub_sample_won <- sample_won
  sub_sample_won$row <- seq_len(nrow(sub_sample_won))

  # Assign NA to our labeled observations 
  sub_sample_won$violence_truth[sub_sample_indices] <- NA
  
  return(sub_sample_won)
}
```

Now let us define how many times we want to run these simulations and set a reproducible vector of seeds.

```{r}
# number of times we want to run this
k = 450

# set seed for replicability
set.seed(233282238)
seeds <- sample(x=1:99999,size=k)
```

Let us run DSL with $N=200, 300, 400$, etc.

```{r}
N = 200

multiple_runs_sign_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_photo_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_fire_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_police_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_children_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_group_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_flag_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_night_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_shouting_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))

smultiple_runs_sign_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_photo_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_fire_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_police_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_children_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_group_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_flag_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_night_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_shouting_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))

rmultiple_runs_sign_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_photo_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_fire_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_police_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_children_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_group_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_flag_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_night_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_shouting_200 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))



# DSL
for(i in 1:k){
  df <- won_sampling(won_protest_machine, N, seeds[i])
  dsl_model_i <- dsl(model = "lm",  formula = violence_truth ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   predicted_var = "violence_truth", prediction = "violence",   data = df, tuning = TRUE)
  d <- data.frame(summary(dsl_model_i))
  multiple_runs_sign_200[,i] <- c(d$Estimate[2], d$Std..Error[2], d$CI.Lower[2], d$CI.Upper[2], d$p.value[2])
  multiple_runs_photo_200[,i] <- c(d$Estimate[3], d$Std..Error[3], d$CI.Lower[3], d$CI.Upper[3], d$p.value[3])
  multiple_runs_fire_200[,i] <- c(d$Estimate[4], d$Std..Error[4], d$CI.Lower[4], d$CI.Upper[4], d$p.value[4])
  multiple_runs_police_200[,i] <-  c(d$Estimate[5], d$Std..Error[5], d$CI.Lower[5], d$CI.Upper[5], d$p.value[5])
  multiple_runs_children_200[,i]<-  c(d$Estimate[6], d$Std..Error[6], d$CI.Lower[6], d$CI.Upper[6], d$p.value[6])
  multiple_runs_group_200[,i] <-  c(d$Estimate[7], d$Std..Error[7], d$CI.Lower[7], d$CI.Upper[7], d$p.value[7])
  multiple_runs_flag_200[,i] <-  c(d$Estimate[8], d$Std..Error[8], d$CI.Lower[8], d$CI.Upper[8], d$p.value[8])
  multiple_runs_night_200[,i] <-  c(d$Estimate[9], d$Std..Error[9], d$CI.Lower[9], d$CI.Upper[9], d$p.value[9])
  multiple_runs_shouting_200[,i] <-  c(d$Estimate[10], d$Std..Error[10], d$CI.Lower[10], d$CI.Upper[10], d$p.value[10])
  dsl_model_i <- NA
  d <- NA
 
  sub_model_i <- lm_robust(formula = violence_truth ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   data = df)
  d<- sub_model_i %>% tidy(conf.int = TRUE)
  d<- as.data.frame(d)
  smultiple_runs_sign_200[,i] <- c(d$estimate[2], d$std.error[2], d$conf.low[2], d$conf.high[2], d$p.value[2])
  smultiple_runs_photo_200[,i] <- c(d$estimate[3], d$std.error[3], d$conf.low[3], d$conf.high[3], d$p.value[3])
  smultiple_runs_fire_200[,i] <- c(d$estimate[4], d$std.error[4], d$conf.low[4], d$conf.high[4], d$p.value[4])
  smultiple_runs_police_200[,i] <- c(d$estimate[5], d$std.error[5], d$conf.low[5], d$conf.high[5], d$p.value[5])
  smultiple_runs_children_200[,i] <- c(d$estimate[6], d$std.error[6], d$conf.low[6], d$conf.high[6], d$p.value[6])
  smultiple_runs_group_200[,i]<- c(d$estimate[7], d$std.error[7], d$conf.low[7], d$conf.high[7], d$p.value[7])
  smultiple_runs_flag_200[,i] <- c(d$estimate[8], d$std.error[8], d$conf.low[8], d$conf.high[8], d$p.value[8])
  smultiple_runs_night_200[,i] <- c(d$estimate[9], d$std.error[9], d$conf.low[9], d$conf.high[9], d$p.value[9])
  smultiple_runs_shouting_200[,i] <- c(d$estimate[10], d$std.error[10], d$conf.low[10], d$conf.high[10], d$p.value[10])
  
   # surrogate model
  sur_model_i <- lm_robust(formula = violence ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   data = df)
  d<- sur_model_i %>% tidy(conf.int = TRUE)
  d<- as.data.frame(d)
  rmultiple_runs_sign_200[,i] <- c(d$estimate[2], d$std.error[2], d$conf.low[2], d$conf.high[2], d$p.value[2])
  rmultiple_runs_photo_200[,i] <- c(d$estimate[3], d$std.error[3], d$conf.low[3], d$conf.high[3], d$p.value[3])
  rmultiple_runs_fire_200[,i] <- c(d$estimate[4], d$std.error[4], d$conf.low[4], d$conf.high[4], d$p.value[4])
  rmultiple_runs_police_200[,i] <- c(d$estimate[5], d$std.error[5], d$conf.low[5], d$conf.high[5], d$p.value[5])
  rmultiple_runs_children_200[,i] <- c(d$estimate[6], d$std.error[6], d$conf.low[6], d$conf.high[6], d$p.value[6])
  rmultiple_runs_group_200[,i]<- c(d$estimate[7], d$std.error[7], d$conf.low[7], d$conf.high[7], d$p.value[7])
  rmultiple_runs_flag_200[,i] <- c(d$estimate[8], d$std.error[8], d$conf.low[8], d$conf.high[8], d$p.value[8])
  rmultiple_runs_night_200[,i] <- c(d$estimate[9], d$std.error[9], d$conf.low[9], d$conf.high[9], d$p.value[9])
  rmultiple_runs_shouting_200[,i] <- c(d$estimate[10], d$std.error[10], d$conf.low[10], d$conf.high[10], d$p.value[10])
  sur_model_i <- NA
  d <- NA
  print(i)
}



N = 300

multiple_runs_sign_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_photo_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_fire_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_police_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_children_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_group_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_flag_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_night_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_shouting_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))

smultiple_runs_sign_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_photo_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_fire_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_police_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_children_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_group_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_flag_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_night_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_shouting_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))

rmultiple_runs_sign_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_photo_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_fire_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_police_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_children_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_group_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_flag_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_night_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_shouting_300 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))



# DSL
for(i in 1:k){
  df <- won_sampling(won_protest_machine, N, seeds[i])
  dsl_model_i <- dsl(model = "lm",  formula = violence_truth ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   predicted_var = "violence_truth", prediction = "violence",   data = df, tuning = TRUE)
  d <- data.frame(summary(dsl_model_i))
  multiple_runs_sign_300[,i] <- c(d$Estimate[2], d$Std..Error[2], d$CI.Lower[2], d$CI.Upper[2], d$p.value[2])
  multiple_runs_photo_300[,i] <- c(d$Estimate[3], d$Std..Error[3], d$CI.Lower[3], d$CI.Upper[3], d$p.value[3])
  multiple_runs_fire_300[,i] <- c(d$Estimate[4], d$Std..Error[4], d$CI.Lower[4], d$CI.Upper[4], d$p.value[4])
  multiple_runs_police_300[,i] <-  c(d$Estimate[5], d$Std..Error[5], d$CI.Lower[5], d$CI.Upper[5], d$p.value[5])
  multiple_runs_children_300[,i]<-  c(d$Estimate[6], d$Std..Error[6], d$CI.Lower[6], d$CI.Upper[6], d$p.value[6])
  multiple_runs_group_300[,i] <-  c(d$Estimate[7], d$Std..Error[7], d$CI.Lower[7], d$CI.Upper[7], d$p.value[7])
  multiple_runs_flag_300[,i] <-  c(d$Estimate[8], d$Std..Error[8], d$CI.Lower[8], d$CI.Upper[8], d$p.value[8])
  multiple_runs_night_300[,i] <-  c(d$Estimate[9], d$Std..Error[9], d$CI.Lower[9], d$CI.Upper[9], d$p.value[9])
  multiple_runs_shouting_300[,i] <-  c(d$Estimate[10], d$Std..Error[10], d$CI.Lower[10], d$CI.Upper[10], d$p.value[10])
  dsl_model_i <- NA
  d <- NA
 
  sub_model_i <- lm_robust(formula = violence_truth ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   data = df)
  d<- sub_model_i %>% tidy(conf.int = TRUE)
  d<- as.data.frame(d)
  smultiple_runs_sign_300[,i] <- c(d$estimate[2], d$std.error[2], d$conf.low[2], d$conf.high[2], d$p.value[2])
  smultiple_runs_photo_300[,i] <- c(d$estimate[3], d$std.error[3], d$conf.low[3], d$conf.high[3], d$p.value[3])
  smultiple_runs_fire_300[,i] <- c(d$estimate[4], d$std.error[4], d$conf.low[4], d$conf.high[4], d$p.value[4])
  smultiple_runs_police_300[,i] <- c(d$estimate[5], d$std.error[5], d$conf.low[5], d$conf.high[5], d$p.value[5])
  smultiple_runs_children_300[,i] <- c(d$estimate[6], d$std.error[6], d$conf.low[6], d$conf.high[6], d$p.value[6])
  smultiple_runs_group_300[,i]<- c(d$estimate[7], d$std.error[7], d$conf.low[7], d$conf.high[7], d$p.value[7])
  smultiple_runs_flag_300[,i] <- c(d$estimate[8], d$std.error[8], d$conf.low[8], d$conf.high[8], d$p.value[8])
  smultiple_runs_night_300[,i] <- c(d$estimate[9], d$std.error[9], d$conf.low[9], d$conf.high[9], d$p.value[9])
  smultiple_runs_shouting_300[,i] <- c(d$estimate[10], d$std.error[10], d$conf.low[10], d$conf.high[10], d$p.value[10])
  
  # surrogate model
  sur_model_i <- lm_robust(formula = violence ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   data = df)
  d<- sur_model_i %>% tidy(conf.int = TRUE)
  d<- as.data.frame(d)
  rmultiple_runs_sign_300[,i] <- c(d$estimate[2], d$std.error[2], d$conf.low[2], d$conf.high[2], d$p.value[2])
  rmultiple_runs_photo_300[,i] <- c(d$estimate[3], d$std.error[3], d$conf.low[3], d$conf.high[3], d$p.value[3])
  rmultiple_runs_fire_300[,i] <- c(d$estimate[4], d$std.error[4], d$conf.low[4], d$conf.high[4], d$p.value[4])
  rmultiple_runs_police_300[,i] <- c(d$estimate[5], d$std.error[5], d$conf.low[5], d$conf.high[5], d$p.value[5])
  rmultiple_runs_children_300[,i] <- c(d$estimate[6], d$std.error[6], d$conf.low[6], d$conf.high[6], d$p.value[6])
  rmultiple_runs_group_300[,i]<- c(d$estimate[7], d$std.error[7], d$conf.low[7], d$conf.high[7], d$p.value[7])
  rmultiple_runs_flag_300[,i] <- c(d$estimate[8], d$std.error[8], d$conf.low[8], d$conf.high[8], d$p.value[8])
  rmultiple_runs_night_300[,i] <- c(d$estimate[9], d$std.error[9], d$conf.low[9], d$conf.high[9], d$p.value[9])
  rmultiple_runs_shouting_300[,i] <- c(d$estimate[10], d$std.error[10], d$conf.low[10], d$conf.high[10], d$p.value[10])
  sur_model_i <- NA
  d <- NA
  print(i)
}





N = 400

multiple_runs_sign_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_photo_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_fire_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_police_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_children_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_group_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_flag_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_night_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_shouting_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))

smultiple_runs_sign_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_photo_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_fire_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_police_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_children_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_group_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_flag_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_night_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_shouting_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))


rmultiple_runs_sign_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_photo_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_fire_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_police_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_children_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_group_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_flag_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_night_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_shouting_400 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))



# DSL
for(i in 1:k){
  df <- won_sampling(won_protest_machine, N, seeds[i])
  dsl_model_i <- dsl(model = "lm",  formula = violence_truth ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   predicted_var = "violence_truth", prediction = "violence",   data = df, tuning = TRUE)
  d <- data.frame(summary(dsl_model_i))
  multiple_runs_sign_400[,i] <- c(d$Estimate[2], d$Std..Error[2], d$CI.Lower[2], d$CI.Upper[2], d$p.value[2])
  multiple_runs_photo_400[,i] <- c(d$Estimate[3], d$Std..Error[3], d$CI.Lower[3], d$CI.Upper[3], d$p.value[3])
  multiple_runs_fire_400[,i] <- c(d$Estimate[4], d$Std..Error[4], d$CI.Lower[4], d$CI.Upper[4], d$p.value[4])
  multiple_runs_police_400[,i] <-  c(d$Estimate[5], d$Std..Error[5], d$CI.Lower[5], d$CI.Upper[5], d$p.value[5])
  multiple_runs_children_400[,i]<-  c(d$Estimate[6], d$Std..Error[6], d$CI.Lower[6], d$CI.Upper[6], d$p.value[6])
  multiple_runs_group_400[,i] <-  c(d$Estimate[7], d$Std..Error[7], d$CI.Lower[7], d$CI.Upper[7], d$p.value[7])
  multiple_runs_flag_400[,i] <-  c(d$Estimate[8], d$Std..Error[8], d$CI.Lower[8], d$CI.Upper[8], d$p.value[8])
  multiple_runs_night_400[,i] <-  c(d$Estimate[9], d$Std..Error[9], d$CI.Lower[9], d$CI.Upper[9], d$p.value[9])
  multiple_runs_shouting_400[,i] <-  c(d$Estimate[10], d$Std..Error[10], d$CI.Lower[10], d$CI.Upper[10], d$p.value[10])
  dsl_model_i <- NA
  d <- NA
 
  sub_model_i <- lm_robust(formula = violence_truth ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   data = df)
  d<- sub_model_i %>% tidy(conf.int = TRUE)
  d<- as.data.frame(d)
  smultiple_runs_sign_400[,i] <- c(d$estimate[2], d$std.error[2], d$conf.low[2], d$conf.high[2], d$p.value[2])
  smultiple_runs_photo_400[,i] <- c(d$estimate[3], d$std.error[3], d$conf.low[3], d$conf.high[3], d$p.value[3])
  smultiple_runs_fire_400[,i] <- c(d$estimate[4], d$std.error[4], d$conf.low[4], d$conf.high[4], d$p.value[4])
  smultiple_runs_police_400[,i] <- c(d$estimate[5], d$std.error[5], d$conf.low[5], d$conf.high[5], d$p.value[5])
  smultiple_runs_children_400[,i] <- c(d$estimate[6], d$std.error[6], d$conf.low[6], d$conf.high[6], d$p.value[6])
  smultiple_runs_group_400[,i]<- c(d$estimate[7], d$std.error[7], d$conf.low[7], d$conf.high[7], d$p.value[7])
  smultiple_runs_flag_400[,i] <- c(d$estimate[8], d$std.error[8], d$conf.low[8], d$conf.high[8], d$p.value[8])
  smultiple_runs_night_400[,i] <- c(d$estimate[9], d$std.error[9], d$conf.low[9], d$conf.high[9], d$p.value[9])
  smultiple_runs_shouting_400[,i] <- c(d$estimate[10], d$std.error[10], d$conf.low[10], d$conf.high[10], d$p.value[10])
  
   # surrogate model
  sur_model_i <- lm_robust(formula = violence ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   data = df)
  d<- sur_model_i %>% tidy(conf.int = TRUE)
  d<- as.data.frame(d)
  rmultiple_runs_sign_400[,i] <- c(d$estimate[2], d$std.error[2], d$conf.low[2], d$conf.high[2], d$p.value[2])
  rmultiple_runs_photo_400[,i] <- c(d$estimate[3], d$std.error[3], d$conf.low[3], d$conf.high[3], d$p.value[3])
  rmultiple_runs_fire_400[,i] <- c(d$estimate[4], d$std.error[4], d$conf.low[4], d$conf.high[4], d$p.value[4])
  rmultiple_runs_police_400[,i] <- c(d$estimate[5], d$std.error[5], d$conf.low[5], d$conf.high[5], d$p.value[5])
  rmultiple_runs_children_400[,i] <- c(d$estimate[6], d$std.error[6], d$conf.low[6], d$conf.high[6], d$p.value[6])
  rmultiple_runs_group_400[,i]<- c(d$estimate[7], d$std.error[7], d$conf.low[7], d$conf.high[7], d$p.value[7])
  rmultiple_runs_flag_400[,i] <- c(d$estimate[8], d$std.error[8], d$conf.low[8], d$conf.high[8], d$p.value[8])
  rmultiple_runs_night_400[,i] <- c(d$estimate[9], d$std.error[9], d$conf.low[9], d$conf.high[9], d$p.value[9])
  rmultiple_runs_shouting_400[,i] <- c(d$estimate[10], d$std.error[10], d$conf.low[10], d$conf.high[10], d$p.value[10])
  sur_model_i <- NA
  d <- NA
  print(i)
}



N = 500

multiple_runs_sign_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_photo_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_fire_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_police_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_children_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_group_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_flag_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_night_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_shouting_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))

smultiple_runs_sign_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_photo_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_fire_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_police_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_children_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_group_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_flag_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_night_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_shouting_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))

rmultiple_runs_sign_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_photo_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_fire_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_police_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_children_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_group_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_flag_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_night_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_shouting_500 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))



# DSL
for(i in 1:k){
  df <- won_sampling(won_protest_machine, N, seeds[i])
  dsl_model_i <- dsl(model = "lm",  formula = violence_truth ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   predicted_var = "violence_truth", prediction = "violence",   data = df, tuning = TRUE)
  d <- data.frame(summary(dsl_model_i))
  multiple_runs_sign_500[,i] <- c(d$Estimate[2], d$Std..Error[2], d$CI.Lower[2], d$CI.Upper[2], d$p.value[2])
  multiple_runs_photo_500[,i] <- c(d$Estimate[3], d$Std..Error[3], d$CI.Lower[3], d$CI.Upper[3], d$p.value[3])
  multiple_runs_fire_500[,i] <- c(d$Estimate[4], d$Std..Error[4], d$CI.Lower[4], d$CI.Upper[4], d$p.value[4])
  multiple_runs_police_500[,i] <-  c(d$Estimate[5], d$Std..Error[5], d$CI.Lower[5], d$CI.Upper[5], d$p.value[5])
  multiple_runs_children_500[,i]<-  c(d$Estimate[6], d$Std..Error[6], d$CI.Lower[6], d$CI.Upper[6], d$p.value[6])
  multiple_runs_group_500[,i] <-  c(d$Estimate[7], d$Std..Error[7], d$CI.Lower[7], d$CI.Upper[7], d$p.value[7])
  multiple_runs_flag_500[,i] <-  c(d$Estimate[8], d$Std..Error[8], d$CI.Lower[8], d$CI.Upper[8], d$p.value[8])
  multiple_runs_night_500[,i] <-  c(d$Estimate[9], d$Std..Error[9], d$CI.Lower[9], d$CI.Upper[9], d$p.value[9])
  multiple_runs_shouting_500[,i] <-  c(d$Estimate[10], d$Std..Error[10], d$CI.Lower[10], d$CI.Upper[10], d$p.value[10])
  dsl_model_i <- NA
  d <- NA
 
  sub_model_i <- lm_robust(formula = violence_truth ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   data = df)
  d<- sub_model_i %>% tidy(conf.int = TRUE)
  d<- as.data.frame(d)
  smultiple_runs_sign_500[,i] <- c(d$estimate[2], d$std.error[2], d$conf.low[2], d$conf.high[2], d$p.value[2])
  smultiple_runs_photo_500[,i] <- c(d$estimate[3], d$std.error[3], d$conf.low[3], d$conf.high[3], d$p.value[3])
  smultiple_runs_fire_500[,i] <- c(d$estimate[4], d$std.error[4], d$conf.low[4], d$conf.high[4], d$p.value[4])
  smultiple_runs_police_500[,i] <- c(d$estimate[5], d$std.error[5], d$conf.low[5], d$conf.high[5], d$p.value[5])
  smultiple_runs_children_500[,i] <- c(d$estimate[6], d$std.error[6], d$conf.low[6], d$conf.high[6], d$p.value[6])
  smultiple_runs_group_500[,i]<- c(d$estimate[7], d$std.error[7], d$conf.low[7], d$conf.high[7], d$p.value[7])
  smultiple_runs_flag_500[,i] <- c(d$estimate[8], d$std.error[8], d$conf.low[8], d$conf.high[8], d$p.value[8])
  smultiple_runs_night_500[,i] <- c(d$estimate[9], d$std.error[9], d$conf.low[9], d$conf.high[9], d$p.value[9])
  smultiple_runs_shouting_500[,i] <- c(d$estimate[10], d$std.error[10], d$conf.low[10], d$conf.high[10], d$p.value[10])
  
  # surrogate model
  sur_model_i <- lm_robust(formula = violence ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   data = df)
  d<- sur_model_i %>% tidy(conf.int = TRUE)
  d<- as.data.frame(d)
  rmultiple_runs_sign_500[,i] <- c(d$estimate[2], d$std.error[2], d$conf.low[2], d$conf.high[2], d$p.value[2])
  rmultiple_runs_photo_500[,i] <- c(d$estimate[3], d$std.error[3], d$conf.low[3], d$conf.high[3], d$p.value[3])
  rmultiple_runs_fire_500[,i] <- c(d$estimate[4], d$std.error[4], d$conf.low[4], d$conf.high[4], d$p.value[4])
  rmultiple_runs_police_500[,i] <- c(d$estimate[5], d$std.error[5], d$conf.low[5], d$conf.high[5], d$p.value[5])
  rmultiple_runs_children_500[,i] <- c(d$estimate[6], d$std.error[6], d$conf.low[6], d$conf.high[6], d$p.value[6])
  rmultiple_runs_group_500[,i]<- c(d$estimate[7], d$std.error[7], d$conf.low[7], d$conf.high[7], d$p.value[7])
  rmultiple_runs_flag_500[,i] <- c(d$estimate[8], d$std.error[8], d$conf.low[8], d$conf.high[8], d$p.value[8])
  rmultiple_runs_night_500[,i] <- c(d$estimate[9], d$std.error[9], d$conf.low[9], d$conf.high[9], d$p.value[9])
  rmultiple_runs_shouting_500[,i] <- c(d$estimate[10], d$std.error[10], d$conf.low[10], d$conf.high[10], d$p.value[10])
  sur_model_i <- NA
  d <- NA
  print(i)
}


#number of times we want to run this

N = 600

multiple_runs_sign_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_photo_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_fire_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_police_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_children_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_group_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_flag_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_night_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_shouting_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))

smultiple_runs_sign_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_photo_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_fire_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_police_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_children_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_group_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_flag_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_night_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_shouting_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))


rmultiple_runs_sign_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_photo_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_fire_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_police_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_children_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_group_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_flag_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_night_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_shouting_600 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))



# DSL
for(i in 1:k){
  df <- won_sampling(won_protest_machine, N, seeds[i])
  dsl_model_i <- dsl(model = "lm",  formula = violence_truth ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   predicted_var = "violence_truth", prediction = "violence",   data = df, tuning = TRUE)
  d <- data.frame(summary(dsl_model_i))
  multiple_runs_sign_600[,i] <- c(d$Estimate[2], d$Std..Error[2], d$CI.Lower[2], d$CI.Upper[2], d$p.value[2])
  multiple_runs_photo_600[,i] <- c(d$Estimate[3], d$Std..Error[3], d$CI.Lower[3], d$CI.Upper[3], d$p.value[3])
  multiple_runs_fire_600[,i] <- c(d$Estimate[4], d$Std..Error[4], d$CI.Lower[4], d$CI.Upper[4], d$p.value[4])
  multiple_runs_police_600[,i] <-  c(d$Estimate[5], d$Std..Error[5], d$CI.Lower[5], d$CI.Upper[5], d$p.value[5])
  multiple_runs_children_600[,i]<-  c(d$Estimate[6], d$Std..Error[6], d$CI.Lower[6], d$CI.Upper[6], d$p.value[6])
  multiple_runs_group_600[,i] <-  c(d$Estimate[7], d$Std..Error[7], d$CI.Lower[7], d$CI.Upper[7], d$p.value[7])
  multiple_runs_flag_600[,i] <-  c(d$Estimate[8], d$Std..Error[8], d$CI.Lower[8], d$CI.Upper[8], d$p.value[8])
  multiple_runs_night_600[,i] <-  c(d$Estimate[9], d$Std..Error[9], d$CI.Lower[9], d$CI.Upper[9], d$p.value[9])
  multiple_runs_shouting_600[,i] <-  c(d$Estimate[10], d$Std..Error[10], d$CI.Lower[10], d$CI.Upper[10], d$p.value[10])
  dsl_model_i <- NA
  d <- NA
 
  sub_model_i <- lm_robust(formula = violence_truth ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   data = df)
  d<- sub_model_i %>% tidy(conf.int = TRUE)
  d<- as.data.frame(d)
  smultiple_runs_sign_600[,i] <- c(d$estimate[2], d$std.error[2], d$conf.low[2], d$conf.high[2], d$p.value[2])
  smultiple_runs_photo_600[,i] <- c(d$estimate[3], d$std.error[3], d$conf.low[3], d$conf.high[3], d$p.value[3])
  smultiple_runs_fire_600[,i] <- c(d$estimate[4], d$std.error[4], d$conf.low[4], d$conf.high[4], d$p.value[4])
  smultiple_runs_police_600[,i] <- c(d$estimate[5], d$std.error[5], d$conf.low[5], d$conf.high[5], d$p.value[5])
  smultiple_runs_children_600[,i] <- c(d$estimate[6], d$std.error[6], d$conf.low[6], d$conf.high[6], d$p.value[6])
  smultiple_runs_group_600[,i]<- c(d$estimate[7], d$std.error[7], d$conf.low[7], d$conf.high[7], d$p.value[7])
  smultiple_runs_flag_600[,i] <- c(d$estimate[8], d$std.error[8], d$conf.low[8], d$conf.high[8], d$p.value[8])
  smultiple_runs_night_600[,i] <- c(d$estimate[9], d$std.error[9], d$conf.low[9], d$conf.high[9], d$p.value[9])
  smultiple_runs_shouting_600[,i] <- c(d$estimate[10], d$std.error[10], d$conf.low[10], d$conf.high[10], d$p.value[10])
  
  # surrogate model
  sur_model_i <- lm_robust(formula = violence ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   data = df)
  d<- sur_model_i %>% tidy(conf.int = TRUE)
  d<- as.data.frame(d)
  rmultiple_runs_sign_600[,i] <- c(d$estimate[2], d$std.error[2], d$conf.low[2], d$conf.high[2], d$p.value[2])
  rmultiple_runs_photo_600[,i] <- c(d$estimate[3], d$std.error[3], d$conf.low[3], d$conf.high[3], d$p.value[3])
  rmultiple_runs_fire_600[,i] <- c(d$estimate[4], d$std.error[4], d$conf.low[4], d$conf.high[4], d$p.value[4])
  rmultiple_runs_police_600[,i] <- c(d$estimate[5], d$std.error[5], d$conf.low[5], d$conf.high[5], d$p.value[5])
  rmultiple_runs_children_600[,i] <- c(d$estimate[6], d$std.error[6], d$conf.low[6], d$conf.high[6], d$p.value[6])
  rmultiple_runs_group_600[,i]<- c(d$estimate[7], d$std.error[7], d$conf.low[7], d$conf.high[7], d$p.value[7])
  rmultiple_runs_flag_600[,i] <- c(d$estimate[8], d$std.error[8], d$conf.low[8], d$conf.high[8], d$p.value[8])
  rmultiple_runs_night_600[,i] <- c(d$estimate[9], d$std.error[9], d$conf.low[9], d$conf.high[9], d$p.value[9])
  rmultiple_runs_shouting_600[,i] <- c(d$estimate[10], d$std.error[10], d$conf.low[10], d$conf.high[10], d$p.value[10])
  sur_model_i <- NA
  d <- NA
  print(i)

}


N = 700

multiple_runs_sign_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_photo_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_fire_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_police_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_children_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_group_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_flag_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_night_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_shouting_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))

smultiple_runs_sign_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_photo_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_fire_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_police_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_children_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_group_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_flag_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_night_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_shouting_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))

rmultiple_runs_sign_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_photo_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_fire_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_police_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_children_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_group_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_flag_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_night_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_shouting_700 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))


# DSL
for(i in 1:k){
  df <- won_sampling(won_protest_machine, N, seeds[i])
  dsl_model_i <- dsl(model = "lm",  formula = violence_truth ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   predicted_var = "violence_truth", prediction = "violence",   data = df, tuning = TRUE)
  d <- data.frame(summary(dsl_model_i))
  multiple_runs_sign_700[,i] <- c(d$Estimate[2], d$Std..Error[2], d$CI.Lower[2], d$CI.Upper[2], d$p.value[2])
  multiple_runs_photo_700[,i] <- c(d$Estimate[3], d$Std..Error[3], d$CI.Lower[3], d$CI.Upper[3], d$p.value[3])
  multiple_runs_fire_700[,i] <- c(d$Estimate[4], d$Std..Error[4], d$CI.Lower[4], d$CI.Upper[4], d$p.value[4])
  multiple_runs_police_700[,i] <-  c(d$Estimate[5], d$Std..Error[5], d$CI.Lower[5], d$CI.Upper[5], d$p.value[5])
  multiple_runs_children_700[,i]<-  c(d$Estimate[6], d$Std..Error[6], d$CI.Lower[6], d$CI.Upper[6], d$p.value[6])
  multiple_runs_group_700[,i] <-  c(d$Estimate[7], d$Std..Error[7], d$CI.Lower[7], d$CI.Upper[7], d$p.value[7])
  multiple_runs_flag_700[,i] <-  c(d$Estimate[8], d$Std..Error[8], d$CI.Lower[8], d$CI.Upper[8], d$p.value[8])
  multiple_runs_night_700[,i] <-  c(d$Estimate[9], d$Std..Error[9], d$CI.Lower[9], d$CI.Upper[9], d$p.value[9])
  multiple_runs_shouting_700[,i] <-  c(d$Estimate[10], d$Std..Error[10], d$CI.Lower[10], d$CI.Upper[10], d$p.value[10])
  dsl_model_i <- NA
  d <- NA
 
  sub_model_i <- lm_robust(formula = violence_truth ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   data = df)
  d<- sub_model_i %>% tidy(conf.int = TRUE)
  d<- as.data.frame(d)
  smultiple_runs_sign_700[,i] <- c(d$estimate[2], d$std.error[2], d$conf.low[2], d$conf.high[2], d$p.value[2])
  smultiple_runs_photo_700[,i] <- c(d$estimate[3], d$std.error[3], d$conf.low[3], d$conf.high[3], d$p.value[3])
  smultiple_runs_fire_700[,i] <- c(d$estimate[4], d$std.error[4], d$conf.low[4], d$conf.high[4], d$p.value[4])
  smultiple_runs_police_700[,i] <- c(d$estimate[5], d$std.error[5], d$conf.low[5], d$conf.high[5], d$p.value[5])
  smultiple_runs_children_700[,i] <- c(d$estimate[6], d$std.error[6], d$conf.low[6], d$conf.high[6], d$p.value[6])
  smultiple_runs_group_700[,i]<- c(d$estimate[7], d$std.error[7], d$conf.low[7], d$conf.high[7], d$p.value[7])
  smultiple_runs_flag_700[,i] <- c(d$estimate[8], d$std.error[8], d$conf.low[8], d$conf.high[8], d$p.value[8])
  smultiple_runs_night_700[,i] <- c(d$estimate[9], d$std.error[9], d$conf.low[9], d$conf.high[9], d$p.value[9])
  smultiple_runs_shouting_700[,i] <- c(d$estimate[10], d$std.error[10], d$conf.low[10], d$conf.high[10], d$p.value[10])
  
  # surrogate model
  sur_model_i <- lm_robust(formula = violence ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   data = df)
  d<- sur_model_i %>% tidy(conf.int = TRUE)
  d<- as.data.frame(d)
  rmultiple_runs_sign_700[,i] <- c(d$estimate[2], d$std.error[2], d$conf.low[2], d$conf.high[2], d$p.value[2])
  rmultiple_runs_photo_700[,i] <- c(d$estimate[3], d$std.error[3], d$conf.low[3], d$conf.high[3], d$p.value[3])
  rmultiple_runs_fire_700[,i] <- c(d$estimate[4], d$std.error[4], d$conf.low[4], d$conf.high[4], d$p.value[4])
  rmultiple_runs_police_700[,i] <- c(d$estimate[5], d$std.error[5], d$conf.low[5], d$conf.high[5], d$p.value[5])
  rmultiple_runs_children_700[,i] <- c(d$estimate[6], d$std.error[6], d$conf.low[6], d$conf.high[6], d$p.value[6])
  rmultiple_runs_group_700[,i]<- c(d$estimate[7], d$std.error[7], d$conf.low[7], d$conf.high[7], d$p.value[7])
  rmultiple_runs_flag_700[,i] <- c(d$estimate[8], d$std.error[8], d$conf.low[8], d$conf.high[8], d$p.value[8])
  rmultiple_runs_night_700[,i] <- c(d$estimate[9], d$std.error[9], d$conf.low[9], d$conf.high[9], d$p.value[9])
  rmultiple_runs_shouting_700[,i] <- c(d$estimate[10], d$std.error[10], d$conf.low[10], d$conf.high[10], d$p.value[10])
  sur_model_i <- NA
  d <- NA
  print(i)
}



N = 800

multiple_runs_sign_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_photo_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_fire_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_police_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_children_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_group_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_flag_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_night_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_shouting_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))

smultiple_runs_sign_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_photo_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_fire_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_police_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_children_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_group_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_flag_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_night_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_shouting_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))


rmultiple_runs_sign_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_photo_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_fire_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_police_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_children_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_group_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_flag_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_night_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_shouting_800 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))


# DSL
for(i in 1:k){
  df <- won_sampling(won_protest_machine, N, seeds[i])
  dsl_model_i <- dsl(model = "lm",  formula = violence_truth ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   predicted_var = "violence_truth", prediction = "violence",   data = df, tuning = TRUE)
  d <- data.frame(summary(dsl_model_i))
  multiple_runs_sign_800[,i] <- c(d$Estimate[2], d$Std..Error[2], d$CI.Lower[2], d$CI.Upper[2], d$p.value[2])
  multiple_runs_photo_800[,i] <- c(d$Estimate[3], d$Std..Error[3], d$CI.Lower[3], d$CI.Upper[3], d$p.value[3])
  multiple_runs_fire_800[,i] <- c(d$Estimate[4], d$Std..Error[4], d$CI.Lower[4], d$CI.Upper[4], d$p.value[4])
  multiple_runs_police_800[,i] <-  c(d$Estimate[5], d$Std..Error[5], d$CI.Lower[5], d$CI.Upper[5], d$p.value[5])
  multiple_runs_children_800[,i]<-  c(d$Estimate[6], d$Std..Error[6], d$CI.Lower[6], d$CI.Upper[6], d$p.value[6])
  multiple_runs_group_800[,i] <-  c(d$Estimate[7], d$Std..Error[7], d$CI.Lower[7], d$CI.Upper[7], d$p.value[7])
  multiple_runs_flag_800[,i] <-  c(d$Estimate[8], d$Std..Error[8], d$CI.Lower[8], d$CI.Upper[8], d$p.value[8])
  multiple_runs_night_800[,i] <-  c(d$Estimate[9], d$Std..Error[9], d$CI.Lower[9], d$CI.Upper[9], d$p.value[9])
  multiple_runs_shouting_800[,i] <-  c(d$Estimate[10], d$Std..Error[10], d$CI.Lower[10], d$CI.Upper[10], d$p.value[10])
  dsl_model_i <- NA
  d <- NA
 
  sub_model_i <- lm_robust(formula = violence_truth ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   data = df)
  d<- sub_model_i %>% tidy(conf.int = TRUE)
  d<- as.data.frame(d)
  smultiple_runs_sign_800[,i] <- c(d$estimate[2], d$std.error[2], d$conf.low[2], d$conf.high[2], d$p.value[2])
  smultiple_runs_photo_800[,i] <- c(d$estimate[3], d$std.error[3], d$conf.low[3], d$conf.high[3], d$p.value[3])
  smultiple_runs_fire_800[,i] <- c(d$estimate[4], d$std.error[4], d$conf.low[4], d$conf.high[4], d$p.value[4])
  smultiple_runs_police_800[,i] <- c(d$estimate[5], d$std.error[5], d$conf.low[5], d$conf.high[5], d$p.value[5])
  smultiple_runs_children_800[,i] <- c(d$estimate[6], d$std.error[6], d$conf.low[6], d$conf.high[6], d$p.value[6])
  smultiple_runs_group_800[,i]<- c(d$estimate[7], d$std.error[7], d$conf.low[7], d$conf.high[7], d$p.value[7])
  smultiple_runs_flag_800[,i] <- c(d$estimate[8], d$std.error[8], d$conf.low[8], d$conf.high[8], d$p.value[8])
  smultiple_runs_night_800[,i] <- c(d$estimate[9], d$std.error[9], d$conf.low[9], d$conf.high[9], d$p.value[9])
  smultiple_runs_shouting_800[,i] <- c(d$estimate[10], d$std.error[10], d$conf.low[10], d$conf.high[10], d$p.value[10])
  
  # surrogate model
  sur_model_i <- lm_robust(formula = violence ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   data = df)
  d<- sur_model_i %>% tidy(conf.int = TRUE)
  d<- as.data.frame(d)
  rmultiple_runs_sign_800[,i] <- c(d$estimate[2], d$std.error[2], d$conf.low[2], d$conf.high[2], d$p.value[2])
  rmultiple_runs_photo_800[,i] <- c(d$estimate[3], d$std.error[3], d$conf.low[3], d$conf.high[3], d$p.value[3])
  rmultiple_runs_fire_800[,i] <- c(d$estimate[4], d$std.error[4], d$conf.low[4], d$conf.high[4], d$p.value[4])
  rmultiple_runs_police_800[,i] <- c(d$estimate[5], d$std.error[5], d$conf.low[5], d$conf.high[5], d$p.value[5])
  rmultiple_runs_children_800[,i] <- c(d$estimate[6], d$std.error[6], d$conf.low[6], d$conf.high[6], d$p.value[6])
  rmultiple_runs_group_800[,i]<- c(d$estimate[7], d$std.error[7], d$conf.low[7], d$conf.high[7], d$p.value[7])
  rmultiple_runs_flag_800[,i] <- c(d$estimate[8], d$std.error[8], d$conf.low[8], d$conf.high[8], d$p.value[8])
  rmultiple_runs_night_800[,i] <- c(d$estimate[9], d$std.error[9], d$conf.low[9], d$conf.high[9], d$p.value[9])
  rmultiple_runs_shouting_800[,i] <- c(d$estimate[10], d$std.error[10], d$conf.low[10], d$conf.high[10], d$p.value[10])
  sur_model_i <- NA
  d <- NA
  print(i)
}




N = 900

multiple_runs_sign_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_photo_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_fire_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_police_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_children_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_group_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_flag_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_night_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_shouting_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))

smultiple_runs_sign_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_photo_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_fire_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_police_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_children_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_group_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_flag_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_night_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_shouting_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))

rmultiple_runs_sign_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_photo_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_fire_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_police_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_children_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_group_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_flag_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_night_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_shouting_900 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))



# DSL
for(i in 1:k){
  df <- won_sampling(won_protest_machine, N, seeds[i])
  dsl_model_i <- dsl(model = "lm",  formula = violence_truth ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   predicted_var = "violence_truth", prediction = "violence",   data = df, tuning = TRUE)
  d <- data.frame(summary(dsl_model_i))
  multiple_runs_sign_900[,i] <- c(d$Estimate[2], d$Std..Error[2], d$CI.Lower[2], d$CI.Upper[2], d$p.value[2])
  multiple_runs_photo_900[,i] <- c(d$Estimate[3], d$Std..Error[3], d$CI.Lower[3], d$CI.Upper[3], d$p.value[3])
  multiple_runs_fire_900[,i] <- c(d$Estimate[4], d$Std..Error[4], d$CI.Lower[4], d$CI.Upper[4], d$p.value[4])
  multiple_runs_police_900[,i] <-  c(d$Estimate[5], d$Std..Error[5], d$CI.Lower[5], d$CI.Upper[5], d$p.value[5])
  multiple_runs_children_900[,i]<-  c(d$Estimate[6], d$Std..Error[6], d$CI.Lower[6], d$CI.Upper[6], d$p.value[6])
  multiple_runs_group_900[,i] <-  c(d$Estimate[7], d$Std..Error[7], d$CI.Lower[7], d$CI.Upper[7], d$p.value[7])
  multiple_runs_flag_900[,i] <-  c(d$Estimate[8], d$Std..Error[8], d$CI.Lower[8], d$CI.Upper[8], d$p.value[8])
  multiple_runs_night_900[,i] <-  c(d$Estimate[9], d$Std..Error[9], d$CI.Lower[9], d$CI.Upper[9], d$p.value[9])
  multiple_runs_shouting_900[,i] <-  c(d$Estimate[10], d$Std..Error[10], d$CI.Lower[10], d$CI.Upper[10], d$p.value[10])
  dsl_model_i <- NA
  d <- NA
 
  sub_model_i <- lm_robust(formula = violence_truth ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   data = df)
  d<- sub_model_i %>% tidy(conf.int = TRUE)
  d<- as.data.frame(d)
  smultiple_runs_sign_900[,i] <- c(d$estimate[2], d$std.error[2], d$conf.low[2], d$conf.high[2], d$p.value[2])
  smultiple_runs_photo_900[,i] <- c(d$estimate[3], d$std.error[3], d$conf.low[3], d$conf.high[3], d$p.value[3])
  smultiple_runs_fire_900[,i] <- c(d$estimate[4], d$std.error[4], d$conf.low[4], d$conf.high[4], d$p.value[4])
  smultiple_runs_police_900[,i] <- c(d$estimate[5], d$std.error[5], d$conf.low[5], d$conf.high[5], d$p.value[5])
  smultiple_runs_children_900[,i] <- c(d$estimate[6], d$std.error[6], d$conf.low[6], d$conf.high[6], d$p.value[6])
  smultiple_runs_group_900[,i]<- c(d$estimate[7], d$std.error[7], d$conf.low[7], d$conf.high[7], d$p.value[7])
  smultiple_runs_flag_900[,i] <- c(d$estimate[8], d$std.error[8], d$conf.low[8], d$conf.high[8], d$p.value[8])
  smultiple_runs_night_900[,i] <- c(d$estimate[9], d$std.error[9], d$conf.low[9], d$conf.high[9], d$p.value[9])
  smultiple_runs_shouting_900[,i] <- c(d$estimate[10], d$std.error[10], d$conf.low[10], d$conf.high[10], d$p.value[10])
  
  # surrogate model
  sur_model_i <- lm_robust(formula = violence ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   data = df)
  d<- sur_model_i %>% tidy(conf.int = TRUE)
  d<- as.data.frame(d)
  rmultiple_runs_sign_900[,i] <- c(d$estimate[2], d$std.error[2], d$conf.low[2], d$conf.high[2], d$p.value[2])
  rmultiple_runs_photo_900[,i] <- c(d$estimate[3], d$std.error[3], d$conf.low[3], d$conf.high[3], d$p.value[3])
  rmultiple_runs_fire_900[,i] <- c(d$estimate[4], d$std.error[4], d$conf.low[4], d$conf.high[4], d$p.value[4])
  rmultiple_runs_police_900[,i] <- c(d$estimate[5], d$std.error[5], d$conf.low[5], d$conf.high[5], d$p.value[5])
  rmultiple_runs_children_900[,i] <- c(d$estimate[6], d$std.error[6], d$conf.low[6], d$conf.high[6], d$p.value[6])
  rmultiple_runs_group_900[,i]<- c(d$estimate[7], d$std.error[7], d$conf.low[7], d$conf.high[7], d$p.value[7])
  rmultiple_runs_flag_900[,i] <- c(d$estimate[8], d$std.error[8], d$conf.low[8], d$conf.high[8], d$p.value[8])
  rmultiple_runs_night_900[,i] <- c(d$estimate[9], d$std.error[9], d$conf.low[9], d$conf.high[9], d$p.value[9])
  rmultiple_runs_shouting_900[,i] <- c(d$estimate[10], d$std.error[10], d$conf.low[10], d$conf.high[10], d$p.value[10])
  sur_model_i <- NA
  d <- NA
  print(i)
}


N = 1000

multiple_runs_sign_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_photo_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_fire_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_police_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_children_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_group_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_flag_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_night_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
multiple_runs_shouting_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))

smultiple_runs_sign_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_photo_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_fire_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_police_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_children_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_group_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_flag_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_night_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
smultiple_runs_shouting_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))

rmultiple_runs_sign_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_photo_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_fire_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_police_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_children_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_group_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_flag_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate","std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_night_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))
rmultiple_runs_shouting_1000 <- data.frame(matrix(nrow = 5,ncol = k), row.names = c("estimate", "std.error", "conf.low", "conf.high", "p.value"))

# DSL
for(i in 1:k){
  df <- won_sampling(won_protest_machine, N, seeds[i])
  dsl_model_i <- dsl(model = "lm",  formula = violence_truth ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   predicted_var = "violence_truth", prediction = "violence",   data = df, tuning = TRUE)
  d <- data.frame(summary(dsl_model_i))
  multiple_runs_sign_1000[,i] <- c(d$Estimate[2], d$Std..Error[2], d$CI.Lower[2], d$CI.Upper[2], d$p.value[2])
  multiple_runs_photo_1000[,i] <- c(d$Estimate[3], d$Std..Error[3], d$CI.Lower[3], d$CI.Upper[3], d$p.value[3])
  multiple_runs_fire_1000[,i] <- c(d$Estimate[4], d$Std..Error[4], d$CI.Lower[4], d$CI.Upper[4], d$p.value[4])
  multiple_runs_police_1000[,i] <-  c(d$Estimate[5], d$Std..Error[5], d$CI.Lower[5], d$CI.Upper[5], d$p.value[5])
  multiple_runs_children_1000[,i]<-  c(d$Estimate[6], d$Std..Error[6], d$CI.Lower[6], d$CI.Upper[6], d$p.value[6])
  multiple_runs_group_1000[,i] <-  c(d$Estimate[7], d$Std..Error[7], d$CI.Lower[7], d$CI.Upper[7], d$p.value[7])
  multiple_runs_flag_1000[,i] <-  c(d$Estimate[8], d$Std..Error[8], d$CI.Lower[8], d$CI.Upper[8], d$p.value[8])
  multiple_runs_night_1000[,i] <-  c(d$Estimate[9], d$Std..Error[9], d$CI.Lower[9], d$CI.Upper[9], d$p.value[9])
  multiple_runs_shouting_1000[,i] <-  c(d$Estimate[10], d$Std..Error[10], d$CI.Lower[10], d$CI.Upper[10], d$p.value[10])
  dsl_model_i <- NA
  d <- NA
 
  sub_model_i <- lm_robust(formula = violence_truth ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   data = df)
  d<- sub_model_i %>% tidy(conf.int = TRUE)
  d<- as.data.frame(d)
  smultiple_runs_sign_1000[,i] <- c(d$estimate[2], d$std.error[2], d$conf.low[2], d$conf.high[2], d$p.value[2])
  smultiple_runs_photo_1000[,i] <- c(d$estimate[3], d$std.error[3], d$conf.low[3], d$conf.high[3], d$p.value[3])
  smultiple_runs_fire_1000[,i] <- c(d$estimate[4], d$std.error[4], d$conf.low[4], d$conf.high[4], d$p.value[4])
  smultiple_runs_police_1000[,i] <- c(d$estimate[5], d$std.error[5], d$conf.low[5], d$conf.high[5], d$p.value[5])
  smultiple_runs_children_1000[,i] <- c(d$estimate[6], d$std.error[6], d$conf.low[6], d$conf.high[6], d$p.value[6])
  smultiple_runs_group_1000[,i]<- c(d$estimate[7], d$std.error[7], d$conf.low[7], d$conf.high[7], d$p.value[7])
  smultiple_runs_flag_1000[,i] <- c(d$estimate[8], d$std.error[8], d$conf.low[8], d$conf.high[8], d$p.value[8])
  smultiple_runs_night_1000[,i] <- c(d$estimate[9], d$std.error[9], d$conf.low[9], d$conf.high[9], d$p.value[9])
  smultiple_runs_shouting_1000[,i] <- c(d$estimate[10], d$std.error[10], d$conf.low[10], d$conf.high[10], d$p.value[10])
  
  # surrogate model
  sur_model_i <- lm_robust(formula = violence ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   data = df)
  d<- sur_model_i %>% tidy(conf.int = TRUE)
  d<- as.data.frame(d)
  rmultiple_runs_sign_1000[,i] <- c(d$estimate[2], d$std.error[2], d$conf.low[2], d$conf.high[2], d$p.value[2])
  rmultiple_runs_photo_1000[,i] <- c(d$estimate[3], d$std.error[3], d$conf.low[3], d$conf.high[3], d$p.value[3])
  rmultiple_runs_fire_1000[,i] <- c(d$estimate[4], d$std.error[4], d$conf.low[4], d$conf.high[4], d$p.value[4])
  rmultiple_runs_police_1000[,i] <- c(d$estimate[5], d$std.error[5], d$conf.low[5], d$conf.high[5], d$p.value[5])
  rmultiple_runs_children_1000[,i] <- c(d$estimate[6], d$std.error[6], d$conf.low[6], d$conf.high[6], d$p.value[6])
  rmultiple_runs_group_1000[,i]<- c(d$estimate[7], d$std.error[7], d$conf.low[7], d$conf.high[7], d$p.value[7])
  rmultiple_runs_flag_1000[,i] <- c(d$estimate[8], d$std.error[8], d$conf.low[8], d$conf.high[8], d$p.value[8])
  rmultiple_runs_night_1000[,i] <- c(d$estimate[9], d$std.error[9], d$conf.low[9], d$conf.high[9], d$p.value[9])
  rmultiple_runs_shouting_1000[,i] <- c(d$estimate[10], d$std.error[10], d$conf.low[10], d$conf.high[10], d$p.value[10])
  sur_model_i <- NA
  d <- NA
  print(i)
}
```

## Let's get the ground truth.

```{r}
won_protest_machine$violence_truth <- won_protest_human$violence
truth_model <- lm_robust(formula = violence_truth ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,   data = df)

truth_tidy <- truth_model  %>% tidy(conf.int = TRUE)
t_sign <- truth_tidy$estimate[2]
t_photo <- truth_tidy$estimate[3]
t_fire <- truth_tidy$estimate[4]
t_police <- truth_tidy$estimate[5]
t_children <- truth_tidy$estimate[6]
t_group <- truth_tidy$estimate[7]
t_flag <- truth_tidy$estimate[8]
t_night <- truth_tidy$estimate[9]
t_shouting <- truth_tidy$estimate[10]

t_up_sign <- truth_tidy$conf.high[2]
t_up_photo <- truth_tidy$conf.high[3]
t_up_fire <- truth_tidy$conf.high[4]
t_up_police <- truth_tidy$conf.high[5]
t_up_children <- truth_tidy$conf.high[6]
t_up_group <- truth_tidy$conf.high[7]
t_up_flag <- truth_tidy$conf.high[8]
t_up_night <- truth_tidy$conf.high[9]
t_up_shouting <- truth_tidy$conf.high[10]

t_l_sign <- truth_tidy$conf.low[2]
t_l_photo <- truth_tidy$conf.low[3]
t_l_fire <- truth_tidy$conf.low[4]
t_l_police <- truth_tidy$conf.low[5]
t_l_children <- truth_tidy$conf.low[6]
t_l_group <- truth_tidy$conf.low[7]
t_l_flag <- truth_tidy$conf.low[8]
t_l_night <- truth_tidy$conf.low[9]
t_l_shouting <- truth_tidy$conf.low[10]
```

## Let us save our dataframes in the right format

```{r}
#save our new dataframes
cdsl_sign_200 <- as.data.frame(t(multiple_runs_sign_200))
cdsl_photo_200 <- as.data.frame(t(multiple_runs_photo_200))
cdsl_fire_200 <- as.data.frame(t(multiple_runs_fire_200))
cdsl_police_200 <- as.data.frame(t(multiple_runs_police_200))
cdsl_children_200 <- as.data.frame(t(multiple_runs_children_200))
cdsl_group_200 <- as.data.frame(t(multiple_runs_group_200))
cdsl_flag_200 <- as.data.frame(t(multiple_runs_flag_200))
cdsl_night_200 <- as.data.frame(t(multiple_runs_night_200))
cdsl_shouting_200 <- as.data.frame(t(multiple_runs_shouting_200))

cdsl_sign_300 <- as.data.frame(t(multiple_runs_sign_300))
cdsl_photo_300 <- as.data.frame(t(multiple_runs_photo_300))
cdsl_fire_300 <- as.data.frame(t(multiple_runs_fire_300))
cdsl_police_300 <- as.data.frame(t(multiple_runs_police_300))
cdsl_children_300 <- as.data.frame(t(multiple_runs_children_300))
cdsl_group_300 <- as.data.frame(t(multiple_runs_group_300))
cdsl_flag_300 <- as.data.frame(t(multiple_runs_flag_300))
cdsl_night_300 <- as.data.frame(t(multiple_runs_night_300))
cdsl_shouting_300 <- as.data.frame(t(multiple_runs_shouting_300))

cdsl_sign_400 <- as.data.frame(t(multiple_runs_sign_400))
cdsl_photo_400 <- as.data.frame(t(multiple_runs_photo_400))
cdsl_fire_400 <- as.data.frame(t(multiple_runs_fire_400))
cdsl_police_400 <- as.data.frame(t(multiple_runs_police_400))
cdsl_children_400 <- as.data.frame(t(multiple_runs_children_400))
cdsl_group_400 <- as.data.frame(t(multiple_runs_group_400))
cdsl_flag_400 <- as.data.frame(t(multiple_runs_flag_400))
cdsl_night_400 <- as.data.frame(t(multiple_runs_night_400))
cdsl_shouting_400 <- as.data.frame(t(multiple_runs_shouting_400))

cdsl_sign_500 <- as.data.frame(t(multiple_runs_sign_500))
cdsl_photo_500 <- as.data.frame(t(multiple_runs_photo_500))
cdsl_fire_500 <- as.data.frame(t(multiple_runs_fire_500))
cdsl_police_500 <- as.data.frame(t(multiple_runs_police_500))
cdsl_children_500 <- as.data.frame(t(multiple_runs_children_500))
cdsl_group_500 <- as.data.frame(t(multiple_runs_group_500))
cdsl_flag_500 <- as.data.frame(t(multiple_runs_flag_500))
cdsl_night_500 <- as.data.frame(t(multiple_runs_night_500))
cdsl_shouting_500 <- as.data.frame(t(multiple_runs_shouting_500))


cdsl_sign_600 <- as.data.frame(t(multiple_runs_sign_600))
cdsl_photo_600 <- as.data.frame(t(multiple_runs_photo_600))
cdsl_fire_600 <- as.data.frame(t(multiple_runs_fire_600))
cdsl_police_600 <- as.data.frame(t(multiple_runs_police_600))
cdsl_children_600 <- as.data.frame(t(multiple_runs_children_600))
cdsl_group_600 <- as.data.frame(t(multiple_runs_group_600))
cdsl_flag_600 <- as.data.frame(t(multiple_runs_flag_600))
cdsl_night_600 <- as.data.frame(t(multiple_runs_night_600))
cdsl_shouting_600 <- as.data.frame(t(multiple_runs_shouting_600))


cdsl_sign_700 <- as.data.frame(t(multiple_runs_sign_700))
cdsl_photo_700 <- as.data.frame(t(multiple_runs_photo_700))
cdsl_fire_700 <- as.data.frame(t(multiple_runs_fire_700))
cdsl_police_700 <- as.data.frame(t(multiple_runs_police_700))
cdsl_children_700 <- as.data.frame(t(multiple_runs_children_700))
cdsl_group_700 <- as.data.frame(t(multiple_runs_group_700))
cdsl_flag_700 <- as.data.frame(t(multiple_runs_flag_700))
cdsl_night_700 <- as.data.frame(t(multiple_runs_night_700))
cdsl_shouting_700 <- as.data.frame(t(multiple_runs_shouting_700))


cdsl_sign_800 <- as.data.frame(t(multiple_runs_sign_800))
cdsl_photo_800 <- as.data.frame(t(multiple_runs_photo_800))
cdsl_fire_800 <- as.data.frame(t(multiple_runs_fire_800))
cdsl_police_800 <- as.data.frame(t(multiple_runs_police_800))
cdsl_children_800 <- as.data.frame(t(multiple_runs_children_800))
cdsl_group_800 <- as.data.frame(t(multiple_runs_group_800))
cdsl_flag_800 <- as.data.frame(t(multiple_runs_flag_800))
cdsl_night_800 <- as.data.frame(t(multiple_runs_night_800))
cdsl_shouting_800 <- as.data.frame(t(multiple_runs_shouting_800))


cdsl_sign_900 <- as.data.frame(t(multiple_runs_sign_900))
cdsl_photo_900 <- as.data.frame(t(multiple_runs_photo_900))
cdsl_fire_900 <- as.data.frame(t(multiple_runs_fire_900))
cdsl_police_900 <- as.data.frame(t(multiple_runs_police_900))
cdsl_children_900 <- as.data.frame(t(multiple_runs_children_900))
cdsl_group_900 <- as.data.frame(t(multiple_runs_group_900))
cdsl_flag_900 <- as.data.frame(t(multiple_runs_flag_900))
cdsl_night_900 <- as.data.frame(t(multiple_runs_night_900))
cdsl_shouting_900 <- as.data.frame(t(multiple_runs_shouting_900))


cdsl_sign_1000 <- as.data.frame(t(multiple_runs_sign_1000))
cdsl_photo_1000 <- as.data.frame(t(multiple_runs_photo_1000))
cdsl_fire_1000 <- as.data.frame(t(multiple_runs_fire_1000))
cdsl_police_1000 <- as.data.frame(t(multiple_runs_police_1000))
cdsl_children_1000 <- as.data.frame(t(multiple_runs_children_1000))
cdsl_group_1000 <- as.data.frame(t(multiple_runs_group_1000))
cdsl_flag_1000 <- as.data.frame(t(multiple_runs_flag_1000))
cdsl_night_1000 <- as.data.frame(t(multiple_runs_night_1000))
cdsl_shouting_1000 <- as.data.frame(t(multiple_runs_shouting_1000))

cdsl_sign_200 <- drop_na(cdsl_sign_200)
cdsl_photo_200 <- drop_na(cdsl_photo_200)
cdsl_fire_200 <- drop_na(cdsl_fire_200)
cdsl_police_200 <- drop_na(cdsl_police_200)
cdsl_children_200 <- drop_na(cdsl_children_200)
cdsl_group_200 <- drop_na(cdsl_group_200)
cdsl_flag_200 <- drop_na(cdsl_flag_200)
cdsl_night_200 <- drop_na(cdsl_night_200)
cdsl_shouting_200 <- drop_na(cdsl_shouting_200)

cdsl_sign_300 <- drop_na(cdsl_sign_300)
cdsl_photo_300 <- drop_na(cdsl_photo_300)
cdsl_fire_300 <- drop_na(cdsl_fire_300)
cdsl_police_300 <- drop_na(cdsl_police_300)
cdsl_children_300 <- drop_na(cdsl_children_300)
cdsl_group_300 <- drop_na(cdsl_group_300)
cdsl_flag_300 <- drop_na(cdsl_flag_300)
cdsl_night_300 <- drop_na(cdsl_night_300)
cdsl_shouting_300 <- drop_na(cdsl_shouting_300)

cdsl_sign_400 <- drop_na(cdsl_sign_400)
cdsl_photo_400 <- drop_na(cdsl_photo_400)
cdsl_fire_400 <- drop_na(cdsl_fire_400)
cdsl_police_400 <- drop_na(cdsl_police_400)
cdsl_children_400 <- drop_na(cdsl_children_400)
cdsl_group_400 <- drop_na(cdsl_group_400)
cdsl_flag_400 <- drop_na(cdsl_flag_400)
cdsl_night_400 <- drop_na(cdsl_night_400)
cdsl_shouting_400 <- drop_na(cdsl_shouting_400)

cdsl_sign_500 <- drop_na(cdsl_sign_500)
cdsl_photo_500 <- drop_na(cdsl_photo_500)
cdsl_fire_500 <- drop_na(cdsl_fire_500)
cdsl_police_500 <- drop_na(cdsl_police_500)
cdsl_children_500 <- drop_na(cdsl_children_500)
cdsl_group_500 <- drop_na(cdsl_group_500)
cdsl_flag_500 <- drop_na(cdsl_flag_500)
cdsl_night_500 <- drop_na(cdsl_night_500)
cdsl_shouting_500 <- drop_na(cdsl_shouting_500)

cdsl_sign_600 <- drop_na(cdsl_sign_600)
cdsl_photo_600 <- drop_na(cdsl_photo_600)
cdsl_fire_600 <- drop_na(cdsl_fire_600)
cdsl_police_600 <- drop_na(cdsl_police_600)
cdsl_children_600 <- drop_na(cdsl_children_600)
cdsl_group_600 <- drop_na(cdsl_group_600)
cdsl_flag_600 <- drop_na(cdsl_flag_600)
cdsl_night_600 <- drop_na(cdsl_night_600)
cdsl_shouting_600 <- drop_na(cdsl_shouting_600)


cdsl_sign_700 <- drop_na(cdsl_sign_700)
cdsl_photo_700 <- drop_na(cdsl_photo_700)
cdsl_fire_700 <- drop_na(cdsl_fire_700)
cdsl_police_700 <- drop_na(cdsl_police_700)
cdsl_children_700 <- drop_na(cdsl_children_700)
cdsl_group_700 <- drop_na(cdsl_group_700)
cdsl_flag_700 <- drop_na(cdsl_flag_700)
cdsl_night_700 <- drop_na(cdsl_night_700)
cdsl_shouting_700 <- drop_na(cdsl_shouting_700)


cdsl_sign_800 <- drop_na(cdsl_sign_800)
cdsl_photo_800 <- drop_na(cdsl_photo_800)
cdsl_fire_800 <- drop_na(cdsl_fire_800)
cdsl_police_800 <- drop_na(cdsl_police_800)
cdsl_children_800 <- drop_na(cdsl_children_800)
cdsl_group_800 <- drop_na(cdsl_group_800)
cdsl_flag_800 <- drop_na(cdsl_flag_800)
cdsl_night_800 <- drop_na(cdsl_night_800)
cdsl_shouting_800 <- drop_na(cdsl_shouting_800)

cdsl_sign_900 <- drop_na(cdsl_sign_900)
cdsl_photo_900 <- drop_na(cdsl_photo_900)
cdsl_fire_900 <- drop_na(cdsl_fire_900)
cdsl_police_900 <- drop_na(cdsl_police_900)
cdsl_children_900 <- drop_na(cdsl_children_900)
cdsl_group_900 <- drop_na(cdsl_group_900)
cdsl_flag_900 <- drop_na(cdsl_flag_900)
cdsl_night_900 <- drop_na(cdsl_night_900)
cdsl_shouting_900 <- drop_na(cdsl_shouting_900)

cdsl_sign_1000 <- drop_na(cdsl_sign_1000)
cdsl_photo_1000 <- drop_na(cdsl_photo_1000)
cdsl_fire_1000 <- drop_na(cdsl_fire_1000)
cdsl_police_1000 <- drop_na(cdsl_police_1000)
cdsl_children_1000 <- drop_na(cdsl_children_1000)
cdsl_group_1000 <- drop_na(cdsl_group_1000)
cdsl_flag_1000 <- drop_na(cdsl_flag_1000)
cdsl_night_1000 <- drop_na(cdsl_night_1000)
cdsl_shouting_1000 <- drop_na(cdsl_shouting_1000)

confinter <- data.frame(names = c("sign", "photo", "fire", "police", "children", "group", "flag", "night", "shouting","sign", "photo", "fire", "police", "children", "group", "flag", "night", "shouting", "sign", "photo", "fire", "police", "children", "group", "flag", "night", "shouting", "sign", "photo", "fire", "police", "children", "group", "flag", "night", "shouting", "sign", "photo", "fire", "police", "children", "group", "flag", "night", "shouting", "sign", "photo", "fire", "police", "children", "group", "flag", "night", "shouting", "sign", "photo", "fire", "police", "children", "group", "flag", "night", "shouting", "sign", "photo", "fire", "police", "children", "group", "flag", "night", "shouting","sign", "photo", "fire", "police", "children", "group", "flag", "night", "shouting","sign", "photo", "fire", "police", "children", "group", "flag", "night", "shouting"))



confinter$up <- c(t_up_sign, t_up_photo, t_up_fire,t_up_police, t_up_children, t_up_group, t_up_flag, t_up_night, t_up_shouting, mean(cdsl_sign_1000$conf.high), mean(cdsl_photo_1000$conf.high), mean(cdsl_fire_1000$conf.high), mean(cdsl_police_1000$conf.high), mean(cdsl_children_1000$conf.high), mean(cdsl_group_1000$conf.high), mean(cdsl_flag_1000$conf.high),  mean(cdsl_night_1000$conf.high), mean(cdsl_shouting_1000$conf.high), mean(cdsl_sign_900$conf.high), mean(cdsl_photo_900$conf.high), mean(cdsl_fire_900$conf.high), mean(cdsl_police_900$conf.high), mean(cdsl_children_900$conf.high), mean(cdsl_group_900$conf.high), mean(cdsl_flag_900$conf.high),  mean(cdsl_night_900$conf.high), mean(cdsl_shouting_900$conf.high), mean(cdsl_sign_800$conf.high), mean(cdsl_photo_800$conf.high), mean(cdsl_fire_800$conf.high), mean(cdsl_police_800$conf.high), mean(cdsl_children_800$conf.high), mean(cdsl_group_800$conf.high), mean(cdsl_flag_800$conf.high),  mean(cdsl_night_800$conf.high), mean(cdsl_shouting_800$conf.high), mean(cdsl_sign_700$conf.high), mean(cdsl_photo_700$conf.high), mean(cdsl_fire_700$conf.high), mean(cdsl_police_700$conf.high), mean(cdsl_children_700$conf.high), mean(cdsl_group_700$conf.high), mean(cdsl_flag_700$conf.high),  mean(cdsl_night_700$conf.high), mean(cdsl_shouting_700$conf.high), mean(cdsl_sign_600$conf.high), mean(cdsl_photo_600$conf.high), mean(cdsl_fire_600$conf.high), mean(cdsl_police_600$conf.high), mean(cdsl_children_600$conf.high), mean(cdsl_group_600$conf.high), mean(cdsl_flag_600$conf.high),  mean(cdsl_night_600$conf.high), mean(cdsl_shouting_600$conf.high), mean(cdsl_sign_500$conf.high), mean(cdsl_photo_500$conf.high), mean(cdsl_fire_500$conf.high), mean(cdsl_police_500$conf.high), mean(cdsl_children_500$conf.high), mean(cdsl_group_500$conf.high), mean(cdsl_flag_500$conf.high),  mean(cdsl_night_500$conf.high), mean(cdsl_shouting_500$conf.high), mean(cdsl_sign_400$conf.high), mean(cdsl_photo_400$conf.high), mean(cdsl_fire_400$conf.high), mean(cdsl_police_400$conf.high), mean(cdsl_children_400$conf.high), mean(cdsl_group_400$conf.high), mean(cdsl_flag_400$conf.high),  mean(cdsl_night_400$conf.high), mean(cdsl_shouting_400$conf.high), mean(cdsl_sign_300$conf.high), mean(cdsl_photo_300$conf.high), mean(cdsl_fire_300$conf.high), mean(cdsl_police_300$conf.high), mean(cdsl_children_300$conf.high), mean(cdsl_group_300$conf.high), mean(cdsl_flag_300$conf.high),  mean(cdsl_night_300$conf.high), mean(cdsl_shouting_300$conf.high), mean(cdsl_sign_200$conf.high), mean(cdsl_photo_200$conf.high), mean(cdsl_fire_200$conf.high), mean(cdsl_police_200$conf.high), mean(cdsl_children_200$conf.high), mean(cdsl_group_200$conf.high), mean(cdsl_flag_200$conf.high),  mean(cdsl_night_200$conf.high), mean(cdsl_shouting_200$conf.high))


confinter$low<- c(t_l_sign, t_l_photo, t_l_fire,t_l_police, t_l_children, t_l_group, t_l_flag, t_l_night, t_l_shouting,mean(cdsl_sign_1000$conf.low), mean(cdsl_photo_1000$conf.low), mean(cdsl_fire_1000$conf.low), mean(cdsl_police_1000$conf.low), mean(cdsl_children_1000$conf.low), mean(cdsl_group_1000$conf.low), mean(cdsl_flag_1000$conf.low),  mean(cdsl_night_1000$conf.low), mean(cdsl_shouting_1000$conf.low), mean(cdsl_sign_900$conf.low), mean(cdsl_photo_900$conf.low), mean(cdsl_fire_900$conf.low), mean(cdsl_police_900$conf.low), mean(cdsl_children_900$conf.low), mean(cdsl_group_900$conf.low), mean(cdsl_flag_900$conf.low),  mean(cdsl_night_900$conf.low), mean(cdsl_shouting_900$conf.low), mean(cdsl_sign_800$conf.low), mean(cdsl_photo_800$conf.low), mean(cdsl_fire_800$conf.low), mean(cdsl_police_800$conf.low), mean(cdsl_children_800$conf.low), mean(cdsl_group_800$conf.low), mean(cdsl_flag_800$conf.low),  mean(cdsl_night_800$conf.low), mean(cdsl_shouting_800$conf.low), mean(cdsl_sign_700$conf.low), mean(cdsl_photo_700$conf.low), mean(cdsl_fire_700$conf.low), mean(cdsl_police_700$conf.low), mean(cdsl_children_700$conf.low), mean(cdsl_group_700$conf.low), mean(cdsl_flag_700$conf.low),  mean(cdsl_night_700$conf.low), mean(cdsl_shouting_700$conf.low), mean(cdsl_sign_600$conf.low), mean(cdsl_photo_600$conf.low), mean(cdsl_fire_600$conf.low), mean(cdsl_police_600$conf.low), mean(cdsl_children_600$conf.low), mean(cdsl_group_600$conf.low), mean(cdsl_flag_600$conf.low),  mean(cdsl_night_600$conf.low), mean(cdsl_shouting_600$conf.low), mean(cdsl_sign_500$conf.low), mean(cdsl_photo_500$conf.low), mean(cdsl_fire_500$conf.low), mean(cdsl_police_500$conf.low), mean(cdsl_children_500$conf.low), mean(cdsl_group_500$conf.low), mean(cdsl_flag_500$conf.low),  mean(cdsl_night_500$conf.low), mean(cdsl_shouting_500$conf.low), mean(cdsl_sign_400$conf.low), mean(cdsl_photo_400$conf.low), mean(cdsl_fire_400$conf.low), mean(cdsl_police_400$conf.low), mean(cdsl_children_400$conf.low), mean(cdsl_group_400$conf.low), mean(cdsl_flag_400$conf.low),  mean(cdsl_night_400$conf.low), mean(cdsl_shouting_400$conf.low), mean(cdsl_sign_300$conf.low), mean(cdsl_photo_300$conf.low), mean(cdsl_fire_300$conf.low), mean(cdsl_police_300$conf.low), mean(cdsl_children_300$conf.low), mean(cdsl_group_300$conf.low), mean(cdsl_flag_300$conf.low),  mean(cdsl_night_300$conf.low), mean(cdsl_shouting_300$conf.low), mean(cdsl_sign_200$conf.low), mean(cdsl_photo_200$conf.low), mean(cdsl_fire_200$conf.low), mean(cdsl_police_200$conf.low), mean(cdsl_children_200$conf.low), mean(cdsl_group_200$conf.low), mean(cdsl_flag_200$conf.low),  mean(cdsl_night_200$conf.low), mean(cdsl_shouting_200$conf.low))


confinter$type <- c("groundtruth","groundtruth", "groundtruth","groundtruth", "groundtruth", "groundtruth", "groundtruth", "groundtruth", "groundtruth", "1000", "1000", "1000", "1000", "1000", "1000", "1000", "1000", "1000", "900",  "900",  "900",  "900",  "900",  "900",  "900",  "900",  "900",  "800", "800", "800", "800", "800", "800", "800", "800", "800", "700", "700", "700", "700", "700", "700", "700", "700", "700", "600", "600", "600", "600", "600", "600", "600", "600", "600", "500", "500", "500", "500", "500", "500", "500", "500", "500", "400", "400", "400", "400", "400", "400", "400", "400", "400", "300", "300", "300", "300", "300", "300", "300", "300", "300", "200", "200", "200", "200", "200", "200", "200", "200", "200")

confinter$width <- abs(confinter$low - confinter$up)

dsl200ci <- confinter[confinter$type == "200",]
dsl300ci <- confinter[confinter$type == "300",]
dsl400ci <- confinter[confinter$type == "400",]
dsl500ci <- confinter[confinter$type == "500",]
dsl600ci <- confinter[confinter$type == "600",]
dsl700ci <- confinter[confinter$type == "700",]
dsl800ci <- confinter[confinter$type == "800",]
dsl900ci <- confinter[confinter$type == "900",]
dsl1000ci <- confinter[confinter$type == "1000",]
oracleci <- confinter[confinter$type == "groundtruth",]
dsl200ci$div <- dsl200ci$width / oracleci$width
dsl300ci$div <- dsl300ci$width / oracleci$width
dsl400ci$div <- dsl400ci$width / oracleci$width
dsl500ci$div <- dsl500ci$width / oracleci$width
dsl600ci$div <- dsl600ci$width / oracleci$width
dsl700ci$div <- dsl700ci$width / oracleci$width
dsl800ci$div <- dsl800ci$width / oracleci$width
dsl900ci$div <- dsl900ci$width / oracleci$width
dsl1000ci$div <- dsl1000ci$width / oracleci$width
plotci <- rbind(dsl200ci,dsl300ci,dsl400ci, dsl500ci, dsl600ci, dsl700ci, dsl800ci, dsl900ci, dsl1000ci)
plotci$annotations <- c(200,200,200,200,200,200,200,200,200,300,300,300,300,300,300,300,300,300,400,400,400,400,400,400,400,400,400,500,500,500,500,500,500,500,500,500,600,600,600,600,600,600,600,600,600,700,700,700,700,700,700,700,700,700,800,800,800,800,800,800,800,800,800,900,900,900,900,900,900,900,900,900,1000,1000,1000,1000,1000,1000,1000,1000,1000)
```

```{r}
#### subsample


sdsl_sign_200 <- as.data.frame(t(smultiple_runs_sign_200))
sdsl_photo_200 <- as.data.frame(t(smultiple_runs_photo_200))
sdsl_fire_200 <- as.data.frame(t(smultiple_runs_fire_200))
sdsl_police_200 <- as.data.frame(t(smultiple_runs_police_200))
sdsl_children_200 <- as.data.frame(t(smultiple_runs_children_200))
sdsl_group_200 <- as.data.frame(t(smultiple_runs_group_200))
sdsl_flag_200 <- as.data.frame(t(smultiple_runs_flag_200))
sdsl_night_200 <- as.data.frame(t(smultiple_runs_night_200))
sdsl_shouting_200 <- as.data.frame(t(smultiple_runs_shouting_200))

sdsl_sign_300 <- as.data.frame(t(smultiple_runs_sign_300))
sdsl_photo_300 <- as.data.frame(t(smultiple_runs_photo_300))
sdsl_fire_300 <- as.data.frame(t(smultiple_runs_fire_300))
sdsl_police_300 <- as.data.frame(t(smultiple_runs_police_300))
sdsl_children_300 <- as.data.frame(t(smultiple_runs_children_300))
sdsl_group_300 <- as.data.frame(t(smultiple_runs_group_300))
sdsl_flag_300 <- as.data.frame(t(smultiple_runs_flag_300))
sdsl_night_300 <- as.data.frame(t(smultiple_runs_night_300))
sdsl_shouting_300 <- as.data.frame(t(smultiple_runs_shouting_300))

sdsl_sign_400 <- as.data.frame(t(smultiple_runs_sign_400))
sdsl_photo_400 <- as.data.frame(t(smultiple_runs_photo_400))
sdsl_fire_400 <- as.data.frame(t(smultiple_runs_fire_400))
sdsl_police_400 <- as.data.frame(t(smultiple_runs_police_400))
sdsl_children_400 <- as.data.frame(t(smultiple_runs_children_400))
sdsl_group_400 <- as.data.frame(t(smultiple_runs_group_400))
sdsl_flag_400 <- as.data.frame(t(smultiple_runs_flag_400))
sdsl_night_400 <- as.data.frame(t(smultiple_runs_night_400))
sdsl_shouting_400 <- as.data.frame(t(smultiple_runs_shouting_400))

sdsl_sign_500 <- as.data.frame(t(smultiple_runs_sign_500))
sdsl_photo_500 <- as.data.frame(t(smultiple_runs_photo_500))
sdsl_fire_500 <- as.data.frame(t(smultiple_runs_fire_500))
sdsl_police_500 <- as.data.frame(t(smultiple_runs_police_500))
sdsl_children_500 <- as.data.frame(t(smultiple_runs_children_500))
sdsl_group_500 <- as.data.frame(t(smultiple_runs_group_500))
sdsl_flag_500 <- as.data.frame(t(smultiple_runs_flag_500))
sdsl_night_500 <- as.data.frame(t(smultiple_runs_night_500))
sdsl_shouting_500 <- as.data.frame(t(smultiple_runs_shouting_500))


sdsl_sign_600 <- as.data.frame(t(smultiple_runs_sign_600))
sdsl_photo_600 <- as.data.frame(t(smultiple_runs_photo_600))
sdsl_fire_600 <- as.data.frame(t(smultiple_runs_fire_600))
sdsl_police_600 <- as.data.frame(t(smultiple_runs_police_600))
sdsl_children_600 <- as.data.frame(t(smultiple_runs_children_600))
sdsl_group_600 <- as.data.frame(t(smultiple_runs_group_600))
sdsl_flag_600 <- as.data.frame(t(smultiple_runs_flag_600))
sdsl_night_600 <- as.data.frame(t(smultiple_runs_night_600))
sdsl_shouting_600 <- as.data.frame(t(smultiple_runs_shouting_600))


sdsl_sign_700 <- as.data.frame(t(smultiple_runs_sign_700))
sdsl_photo_700 <- as.data.frame(t(smultiple_runs_photo_700))
sdsl_fire_700 <- as.data.frame(t(smultiple_runs_fire_700))
sdsl_police_700 <- as.data.frame(t(smultiple_runs_police_700))
sdsl_children_700 <- as.data.frame(t(smultiple_runs_children_700))
sdsl_group_700 <- as.data.frame(t(smultiple_runs_group_700))
sdsl_flag_700 <- as.data.frame(t(smultiple_runs_flag_700))
sdsl_night_700 <- as.data.frame(t(smultiple_runs_night_700))
sdsl_shouting_700 <- as.data.frame(t(smultiple_runs_shouting_700))


sdsl_sign_800 <- as.data.frame(t(smultiple_runs_sign_800))
sdsl_photo_800 <- as.data.frame(t(smultiple_runs_photo_800))
sdsl_fire_800 <- as.data.frame(t(smultiple_runs_fire_800))
sdsl_police_800 <- as.data.frame(t(smultiple_runs_police_800))
sdsl_children_800 <- as.data.frame(t(smultiple_runs_children_800))
sdsl_group_800 <- as.data.frame(t(smultiple_runs_group_800))
sdsl_flag_800 <- as.data.frame(t(smultiple_runs_flag_800))
sdsl_night_800 <- as.data.frame(t(smultiple_runs_night_800))
sdsl_shouting_800 <- as.data.frame(t(smultiple_runs_shouting_800))


sdsl_sign_900 <- as.data.frame(t(smultiple_runs_sign_900))
sdsl_photo_900 <- as.data.frame(t(smultiple_runs_photo_900))
sdsl_fire_900 <- as.data.frame(t(smultiple_runs_fire_900))
sdsl_police_900 <- as.data.frame(t(smultiple_runs_police_900))
sdsl_children_900 <- as.data.frame(t(smultiple_runs_children_900))
sdsl_group_900 <- as.data.frame(t(smultiple_runs_group_900))
sdsl_flag_900 <- as.data.frame(t(smultiple_runs_flag_900))
sdsl_night_900 <- as.data.frame(t(smultiple_runs_night_900))
sdsl_shouting_900 <- as.data.frame(t(smultiple_runs_shouting_900))


sdsl_sign_1000 <- as.data.frame(t(smultiple_runs_sign_1000))
sdsl_photo_1000 <- as.data.frame(t(smultiple_runs_photo_1000))
sdsl_fire_1000 <- as.data.frame(t(smultiple_runs_fire_1000))
sdsl_police_1000 <- as.data.frame(t(smultiple_runs_police_1000))
sdsl_children_1000 <- as.data.frame(t(smultiple_runs_children_1000))
sdsl_group_1000 <- as.data.frame(t(smultiple_runs_group_1000))
sdsl_flag_1000 <- as.data.frame(t(smultiple_runs_flag_1000))
sdsl_night_1000 <- as.data.frame(t(smultiple_runs_night_1000))
sdsl_shouting_1000 <- as.data.frame(t(smultiple_runs_shouting_1000))


sdsl_sign_200 <- drop_na(sdsl_sign_200)
sdsl_photo_200 <- drop_na(sdsl_photo_200)
sdsl_fire_200 <- drop_na(sdsl_fire_200)
sdsl_police_200 <- drop_na(sdsl_police_200)
sdsl_children_200 <- drop_na(sdsl_children_200)
sdsl_group_200 <- drop_na(sdsl_group_200)
sdsl_flag_200 <- drop_na(sdsl_flag_200)
sdsl_night_200 <- drop_na(sdsl_night_200)
sdsl_shouting_200 <- drop_na(sdsl_shouting_200)

sdsl_sign_300 <- drop_na(sdsl_sign_300)
sdsl_photo_300 <- drop_na(sdsl_photo_300)
sdsl_fire_300 <- drop_na(sdsl_fire_300)
sdsl_police_300 <- drop_na(sdsl_police_300)
sdsl_children_300 <- drop_na(sdsl_children_300)
sdsl_group_300 <- drop_na(sdsl_group_300)
sdsl_flag_300 <- drop_na(sdsl_flag_300)
sdsl_night_300 <- drop_na(sdsl_night_300)
sdsl_shouting_300 <- drop_na(sdsl_shouting_300)

sdsl_sign_400 <- drop_na(sdsl_sign_400)
sdsl_photo_400 <- drop_na(sdsl_photo_400)
sdsl_fire_400 <- drop_na(sdsl_fire_400)
sdsl_police_400 <- drop_na(sdsl_police_400)
sdsl_children_400 <- drop_na(sdsl_children_400)
sdsl_group_400 <- drop_na(sdsl_group_400)
sdsl_flag_400 <- drop_na(sdsl_flag_400)
sdsl_night_400 <- drop_na(sdsl_night_400)
sdsl_shouting_400 <- drop_na(sdsl_shouting_400)

sdsl_sign_500 <- drop_na(sdsl_sign_500)
sdsl_photo_500 <- drop_na(sdsl_photo_500)
sdsl_fire_500 <- drop_na(sdsl_fire_500)
sdsl_police_500 <- drop_na(sdsl_police_500)
sdsl_children_500 <- drop_na(sdsl_children_500)
sdsl_group_500 <- drop_na(sdsl_group_500)
sdsl_flag_500 <- drop_na(sdsl_flag_500)
sdsl_night_500 <- drop_na(sdsl_night_500)
sdsl_shouting_500 <- drop_na(sdsl_shouting_500)

sdsl_sign_600 <- drop_na(sdsl_sign_600)
sdsl_photo_600 <- drop_na(sdsl_photo_600)
sdsl_fire_600 <- drop_na(sdsl_fire_600)
sdsl_police_600 <- drop_na(sdsl_police_600)
sdsl_children_600 <- drop_na(sdsl_children_600)
sdsl_group_600 <- drop_na(sdsl_group_600)
sdsl_flag_600 <- drop_na(sdsl_flag_600)
sdsl_night_600 <- drop_na(sdsl_night_600)
sdsl_shouting_600 <- drop_na(sdsl_shouting_600)


sdsl_sign_700 <- drop_na(sdsl_sign_700)
sdsl_photo_700 <- drop_na(sdsl_photo_700)
sdsl_fire_700 <- drop_na(sdsl_fire_700)
sdsl_police_700 <- drop_na(sdsl_police_700)
sdsl_children_700 <- drop_na(sdsl_children_700)
sdsl_group_700 <- drop_na(sdsl_group_700)
sdsl_flag_700 <- drop_na(sdsl_flag_700)
sdsl_night_700 <- drop_na(sdsl_night_700)
sdsl_shouting_700 <- drop_na(sdsl_shouting_700)


sdsl_sign_800 <- drop_na(sdsl_sign_800)
sdsl_photo_800 <- drop_na(sdsl_photo_800)
sdsl_fire_800 <- drop_na(sdsl_fire_800)
sdsl_police_800 <- drop_na(sdsl_police_800)
sdsl_children_800 <- drop_na(sdsl_children_800)
sdsl_group_800 <- drop_na(sdsl_group_800)
sdsl_flag_800 <- drop_na(sdsl_flag_800)
sdsl_night_800 <- drop_na(sdsl_night_800)
sdsl_shouting_800 <- drop_na(sdsl_shouting_800)

sdsl_sign_900 <- drop_na(sdsl_sign_900)
sdsl_photo_900 <- drop_na(sdsl_photo_900)
sdsl_fire_900 <- drop_na(sdsl_fire_900)
sdsl_police_900 <- drop_na(sdsl_police_900)
sdsl_children_900 <- drop_na(sdsl_children_900)
sdsl_group_900 <- drop_na(sdsl_group_900)
sdsl_flag_900 <- drop_na(sdsl_flag_900)
sdsl_night_900 <- drop_na(sdsl_night_900)
sdsl_shouting_900 <- drop_na(sdsl_shouting_900)

sdsl_sign_1000 <- drop_na(sdsl_sign_1000)
sdsl_photo_1000 <- drop_na(sdsl_photo_1000)
sdsl_fire_1000 <- drop_na(sdsl_fire_1000)
sdsl_police_1000 <- drop_na(sdsl_police_1000)
sdsl_children_1000 <- drop_na(sdsl_children_1000)
sdsl_group_1000 <- drop_na(sdsl_group_1000)
sdsl_flag_1000 <- drop_na(sdsl_flag_1000)
sdsl_night_1000 <- drop_na(sdsl_night_1000)
sdsl_shouting_1000 <- drop_na(sdsl_shouting_1000)


sconfinter <- data.frame(names = c("sign", "photo", "fire", "police", "children", "group", "flag", "night", "shouting","sign", "photo", "fire", "police", "children", "group", "flag", "night", "shouting", "sign", "photo", "fire", "police", "children", "group", "flag", "night", "shouting", "sign", "photo", "fire", "police", "children", "group", "flag", "night", "shouting", "sign", "photo", "fire", "police", "children", "group", "flag", "night", "shouting", "sign", "photo", "fire", "police", "children", "group", "flag", "night", "shouting", "sign", "photo", "fire", "police", "children", "group", "flag", "night", "shouting", "sign", "photo", "fire", "police", "children", "group", "flag", "night", "shouting","sign", "photo", "fire", "police", "children", "group", "flag", "night", "shouting","sign", "photo", "fire", "police", "children", "group", "flag", "night", "shouting"))


sconfinter$up <- c(t_up_sign, t_up_photo, t_up_fire,t_up_police, t_up_children, t_up_group, t_up_flag, t_up_night, t_up_shouting, mean(sdsl_sign_1000$conf.high), mean(sdsl_photo_1000$conf.high), mean(sdsl_fire_1000$conf.high), mean(sdsl_police_1000$conf.high), mean(sdsl_children_1000$conf.high), mean(sdsl_group_1000$conf.high), mean(sdsl_flag_1000$conf.high),  mean(sdsl_night_1000$conf.high), mean(sdsl_shouting_1000$conf.high), mean(sdsl_sign_900$conf.high), mean(sdsl_photo_900$conf.high), mean(sdsl_fire_900$conf.high), mean(sdsl_police_900$conf.high), mean(sdsl_children_900$conf.high), mean(sdsl_group_900$conf.high), mean(sdsl_flag_900$conf.high),  mean(sdsl_night_900$conf.high), mean(sdsl_shouting_900$conf.high), mean(sdsl_sign_800$conf.high), mean(sdsl_photo_800$conf.high), mean(sdsl_fire_800$conf.high), mean(sdsl_police_800$conf.high), mean(sdsl_children_800$conf.high), mean(sdsl_group_800$conf.high), mean(sdsl_flag_800$conf.high),  mean(sdsl_night_800$conf.high), mean(sdsl_shouting_800$conf.high), mean(sdsl_sign_700$conf.high), mean(sdsl_photo_700$conf.high), mean(sdsl_fire_700$conf.high), mean(sdsl_police_700$conf.high), mean(sdsl_children_700$conf.high), mean(sdsl_group_700$conf.high), mean(sdsl_flag_700$conf.high),  mean(sdsl_night_700$conf.high), mean(sdsl_shouting_700$conf.high), mean(sdsl_sign_600$conf.high), mean(sdsl_photo_600$conf.high), mean(sdsl_fire_600$conf.high), mean(sdsl_police_600$conf.high), mean(sdsl_children_600$conf.high), mean(sdsl_group_600$conf.high), mean(sdsl_flag_600$conf.high),  mean(sdsl_night_600$conf.high), mean(sdsl_shouting_600$conf.high), mean(sdsl_sign_500$conf.high), mean(sdsl_photo_500$conf.high), mean(sdsl_fire_500$conf.high), mean(sdsl_police_500$conf.high), mean(sdsl_children_500$conf.high), mean(sdsl_group_500$conf.high), mean(sdsl_flag_500$conf.high),  mean(sdsl_night_500$conf.high), mean(sdsl_shouting_500$conf.high), mean(sdsl_sign_400$conf.high), mean(sdsl_photo_400$conf.high), mean(sdsl_fire_400$conf.high), mean(sdsl_police_400$conf.high), mean(sdsl_children_400$conf.high), mean(sdsl_group_400$conf.high), mean(sdsl_flag_400$conf.high),  mean(sdsl_night_400$conf.high), mean(sdsl_shouting_400$conf.high), mean(sdsl_sign_300$conf.high), mean(sdsl_photo_300$conf.high), mean(sdsl_fire_300$conf.high), mean(sdsl_police_300$conf.high), mean(sdsl_children_300$conf.high), mean(sdsl_group_300$conf.high), mean(sdsl_flag_300$conf.high),  mean(sdsl_night_300$conf.high), mean(sdsl_shouting_300$conf.high), mean(sdsl_sign_200$conf.high), mean(sdsl_photo_200$conf.high), mean(sdsl_fire_200$conf.high), mean(sdsl_police_200$conf.high), mean(sdsl_children_200$conf.high), mean(sdsl_group_200$conf.high), mean(sdsl_flag_200$conf.high),  mean(sdsl_night_200$conf.high), mean(sdsl_shouting_200$conf.high))


sconfinter$low<- c(t_l_sign, t_l_photo, t_l_fire,t_l_police, t_l_children, t_l_group, t_l_flag, t_l_night, t_l_shouting,mean(sdsl_sign_1000$conf.low), mean(sdsl_photo_1000$conf.low), mean(sdsl_fire_1000$conf.low), mean(sdsl_police_1000$conf.low), mean(sdsl_children_1000$conf.low), mean(sdsl_group_1000$conf.low), mean(sdsl_flag_1000$conf.low),  mean(sdsl_night_1000$conf.low), mean(sdsl_shouting_1000$conf.low), mean(sdsl_sign_900$conf.low), mean(sdsl_photo_900$conf.low), mean(sdsl_fire_900$conf.low), mean(sdsl_police_900$conf.low), mean(sdsl_children_900$conf.low), mean(sdsl_group_900$conf.low), mean(sdsl_flag_900$conf.low),  mean(sdsl_night_900$conf.low), mean(sdsl_shouting_900$conf.low), mean(sdsl_sign_800$conf.low), mean(sdsl_photo_800$conf.low), mean(sdsl_fire_800$conf.low), mean(sdsl_police_800$conf.low), mean(sdsl_children_800$conf.low), mean(sdsl_group_800$conf.low), mean(sdsl_flag_800$conf.low),  mean(sdsl_night_800$conf.low), mean(sdsl_shouting_800$conf.low), mean(sdsl_sign_700$conf.low), mean(sdsl_photo_700$conf.low), mean(sdsl_fire_700$conf.low), mean(sdsl_police_700$conf.low), mean(sdsl_children_700$conf.low), mean(sdsl_group_700$conf.low), mean(sdsl_flag_700$conf.low),  mean(sdsl_night_700$conf.low), mean(sdsl_shouting_700$conf.low), mean(sdsl_sign_600$conf.low), mean(sdsl_photo_600$conf.low), mean(sdsl_fire_600$conf.low), mean(sdsl_police_600$conf.low), mean(sdsl_children_600$conf.low), mean(sdsl_group_600$conf.low), mean(sdsl_flag_600$conf.low),  mean(sdsl_night_600$conf.low), mean(sdsl_shouting_600$conf.low), mean(sdsl_sign_500$conf.low), mean(sdsl_photo_500$conf.low), mean(sdsl_fire_500$conf.low), mean(sdsl_police_500$conf.low), mean(sdsl_children_500$conf.low), mean(sdsl_group_500$conf.low), mean(sdsl_flag_500$conf.low),  mean(sdsl_night_500$conf.low), mean(sdsl_shouting_500$conf.low), mean(sdsl_sign_400$conf.low), mean(sdsl_photo_400$conf.low), mean(sdsl_fire_400$conf.low), mean(sdsl_police_400$conf.low), mean(sdsl_children_400$conf.low), mean(sdsl_group_400$conf.low), mean(sdsl_flag_400$conf.low),  mean(sdsl_night_400$conf.low), mean(sdsl_shouting_400$conf.low), mean(sdsl_sign_300$conf.low), mean(sdsl_photo_300$conf.low), mean(sdsl_fire_300$conf.low), mean(sdsl_police_300$conf.low), mean(sdsl_children_300$conf.low), mean(sdsl_group_300$conf.low), mean(sdsl_flag_300$conf.low),  mean(sdsl_night_300$conf.low), mean(sdsl_shouting_300$conf.low), mean(sdsl_sign_200$conf.low), mean(sdsl_photo_200$conf.low), mean(sdsl_fire_200$conf.low), mean(sdsl_police_200$conf.low), mean(sdsl_children_200$conf.low), mean(sdsl_group_200$conf.low), mean(sdsl_flag_200$conf.low),  mean(sdsl_night_200$conf.low), mean(sdsl_shouting_200$conf.low))


sconfinter$type <- c("groundtruth","groundtruth", "groundtruth","groundtruth", "groundtruth", "groundtruth", "groundtruth", "groundtruth", "groundtruth", "1000", "1000", "1000", "1000", "1000", "1000", "1000", "1000", "1000", "900",  "900",  "900",  "900",  "900",  "900",  "900",  "900",  "900",  "800", "800", "800", "800", "800", "800", "800", "800", "800", "700", "700", "700", "700", "700", "700", "700", "700", "700", "600", "600", "600", "600", "600", "600", "600", "600", "600", "500", "500", "500", "500", "500", "500", "500", "500", "500", "400", "400", "400", "400", "400", "400", "400", "400", "400", "300", "300", "300", "300", "300", "300", "300", "300", "300", "200", "200", "200", "200", "200", "200", "200", "200", "200")

sconfinter$width <- abs(sconfinter$low - sconfinter$up)

sdsl200ci <- sconfinter[sconfinter$type == "200",]
sdsl300ci <- sconfinter[sconfinter$type == "300",]
sdsl400ci <- sconfinter[sconfinter$type == "400",]
sdsl500ci <- sconfinter[sconfinter$type == "500",]
sdsl600ci <- sconfinter[sconfinter$type == "600",]
sdsl700ci <- sconfinter[sconfinter$type == "700",]
sdsl800ci <- sconfinter[sconfinter$type == "800",]
sdsl900ci <- sconfinter[sconfinter$type == "900",]
sdsl1000ci <- sconfinter[sconfinter$type == "1000",]
soracleci <- sconfinter[sconfinter$type == "groundtruth",]
sdsl200ci$div <- dsl200ci$width / sdsl200ci$width 
sdsl300ci$div <- dsl300ci$width / sdsl300ci$width
sdsl400ci$div <- dsl400ci$width / sdsl400ci$width 
sdsl500ci$div <- dsl500ci$width / sdsl500ci$width 
sdsl600ci$div <- dsl600ci$width / sdsl600ci$width 
sdsl700ci$div <- dsl700ci$width / sdsl700ci$width 
sdsl800ci$div <- dsl800ci$width /sdsl800ci$width 
sdsl900ci$div <- dsl900ci$width /sdsl900ci$width
sdsl1000ci$div <- dsl1000ci$width /sdsl1000ci$width 

splotci_sub <- rbind(sdsl200ci,sdsl300ci,sdsl400ci, sdsl500ci, sdsl600ci, sdsl700ci, sdsl800ci, sdsl900ci, sdsl1000ci)
splotci_sub$annotations <- c(200,200,200,200,200,200,200,200,200,300,300,300,300,300,300,300,300,300,400,400,400,400,400,400,400,400,400,500,500,500,500,500,500,500,500,500,600,600,600,600,600,600,600,600,600,700,700,700,700,700,700,700,700,700,800,800,800,800,800,800,800,800,800,900,900,900,900,900,900,900,900,900,1000,1000,1000,1000,1000,1000,1000,1000,1000)
```

## Figure 7

```{r}
ggplot(splotci_sub, aes(y = div, x = annotations,color = names, group = names, shape = names)) + geom_point() + geom_line() + labs(title = "Ratio between DSL CI and sub sample annotations CI by number of annotations", y = "Width of DSL CI/Width of Sub-sample CI", x = "Number of annotations") +theme(plot.title=element_text(face="bold")) + theme_bw() +theme(plot.title=element_text(face="bold")) + theme_bw()  +
  theme(
    plot.title = element_text(size = 16), # Increase title font size
    axis.title = element_text(size = 14),               # Increase axis title font size
    axis.text = element_text(size = 14),                # Increase axis text font size
    strip.text = element_text(size = 14),               # Increase facet label font size
    legend.text = element_text(size = 14),              # Increase legend text font size
    legend.title = element_text(size = 14)              # Increase legend title font size
  ) 
```

```{r}
ggplot(plotci, aes(y = div, x = annotations,color = names, group = names, shape = names)) + geom_point() + geom_line() + labs(title = "Width of DSL CI/Oracle CI", y = "Width of DSL CI/Width of Oracle CI", x = "Number of annotations") +theme(plot.title=element_text(face="bold")) + theme_bw()  +
  theme(
    plot.title = element_text(size = 16), # Increase title font size
    axis.title = element_text(size = 14),               # Increase axis title font size
    axis.text = element_text(size = 14),                # Increase axis text font size
    strip.text = element_text(size = 14),               # Increase facet label font size
    legend.text = element_text(size = 14),              # Increase legend text font size
    legend.title = element_text(size = 14)              # Increase legend title font size
  ) 
```

```{r}
splotci_sub %>% filter(names %in% c("group", "night", "photo")) %>% ggplot(aes(y = div, x = annotations,color = names, group = names, shape = names)) + geom_point() + geom_line() + labs(title = "Ratio between DSL CI and sub-sample CI by number of annotations", y = "Width of DSL CI/sub-sample CI", x = "Number of annotations")  + theme_bw()  +    theme(     plot.title = element_text(size = 16), # Increase title font size
        axis.title = element_text(size = 14),               # Increase axis title font size
         axis.text = element_text(size = 14),                # Increase axis text font size
         strip.text = element_text(size = 14),               # Increase facet label font size
         legend.text = element_text(size = 14),              # Increase legend text font size
         legend.title = element_text(size = 14)  )  
```

```{r}
splotci_sub %>%
  filter(names %in% c("group", "night", "photo")) %>%
  ggplot(aes(y = div, x = annotations, color = names, group = names, shape = names)) +
  geom_point() +
  geom_line() +
  labs(
    title = "Ratio between DSL CI and sub-sample CI by number of annotations",
    y = "Width of DSL CI/sub-sample CI",
    x = "Number of annotations"
  ) +
  theme_bw() +
  theme(
    plot.title = element_text(size = 16),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 14),
    strip.text = element_text(size = 14)
  ) +
  guides(color = "none", shape = "none") +  # Remove legend
  annotate("text",
           x = max(splotci_sub$annotations),  
           y = splotci_sub %>% filter(names == "group") %>% pull(div) %>% max(),
           label = "group", color = "tomato", hjust = 2.5, fontface = "bold") +
  annotate("text",
           x = max(splotci_sub$annotations),
           y = splotci_sub %>% filter(names == "night") %>% pull(div) %>% max(),
           label = "night", color = "forestgreen", hjust = -0.2, fontface = "bold") +
  annotate("text",
           x = max(splotci_sub$annotations),
           y = splotci_sub %>% filter(names == "photo") %>% pull(div) %>% max(),
           label = "photo", color = "royalblue", hjust = 2, fontface = "bold") +
  # Arrow annotation for "Bigger gains from DSL"
  annotate("segment", 
           x = quantile(splotci_sub$annotations, 0.75), 
           y = quantile(splotci_sub$div, 0.25), 
           xend = quantile(splotci_sub$annotations, 0.75), 
           yend = quantile(splotci_sub$div, 0.15), 
           arrow = arrow(length = unit(0.2, "cm")), 
           color = "black") +
  annotate("text", 
           x = quantile(splotci_sub$annotations, 0.75) + 5, 
           y = quantile(splotci_sub$div, 0.2), 
           label = "Bigger gains from DSL", 
           color = "black", fontface = "bold", hjust = 0)

```

```{r}
splotci_sub %>%
  filter(names %in% c("group", "night", "photo")) %>%
  ggplot(aes(y = div, x = annotations, color = names, group = names, shape = names)) +
  geom_point() +
  geom_line() +
  labs(
    title = "Ratio between DSL CI and sub-sample CI by number of annotations",
    y = "Width of DSL CI/sub-sample CI",
    x = "Number of annotations"
  ) +
  theme_bw() +
  theme(
    plot.title = element_text(size = 16),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 14),
    strip.text = element_text(size = 14)
  ) +
  guides(color = "none", shape = "none") +  # Remove legend
  annotate("text",
           x =300,  
           y = 0.88,
           label = "group", color = "tomato", hjust = 0, fontface = "bold", size = 5) +
  annotate("text",
           x = 300,
           y = 0.83,
           label = "night", color = "forestgreen", hjust = 0, fontface = "bold", size = 5) +
  annotate("text",
           x = 300,
           y = 0.78,
           label = "photo", color = "royalblue", hjust = 0, fontface = "bold", size = 5) +
  # Arrow annotation for "Bigger gains from DSL"
  annotate("segment", 
           x = 790, 
           y = quantile(splotci_sub$div, 0.15), 
           xend = 790, 
           yend = quantile(splotci_sub$div, 0.05), 
           arrow = arrow(length = unit(0.3, "cm")), 
           color = "black") +
  annotate("text", 
           x = quantile(splotci_sub$annotations, 0.6), 
           y = quantile(splotci_sub$div, 0.2), 
           label = "Bigger gains from DSL", 
           color = "black", fontface = "bold", hjust = 0, size = 4.5)
```

## Figure 8

```{r}
prediction_df <- won_sampling(won_protest_machine, N = 300, seed = 08544)
powerdsl <- power_dsl(labeled_size = c(400, 500, 600,700,800,900, 1000), model = "lm",formula = violence_truth ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,predicted_var = "violence_truth",prediction = "violence",data = prediction_df, tuning = TRUE)
```

```{r}
prediction <- summary(powerdsl)
prediction <- as.data.frame(prediction)

names(prediction)[names(prediction) == "sign1"] <- "sign"
names(prediction)[names(prediction) == "photo1"] <- "photo"
names(prediction)[names(prediction) == "fire1"] <- "fire"
names(prediction)[names(prediction) == "police1"] <- "police"
names(prediction)[names(prediction) == "children1"] <- "children"
names(prediction)[names(prediction) == "group_201"] <- "group"
names(prediction)[names(prediction) == "flag1"] <- "flag"
names(prediction)[names(prediction) == "night1"] <- "night"
names(prediction)[names(prediction) == "shouting1"] <- "shouting"

prediction <- prediction[,colnames(prediction) %in% c("group", "night", "photo")]
prediction <- prediction[-c(1),]
head(prediction)
```

```{r}

real <- data.frame("photo" = c(mean(cdsl_photo_400$std.error), mean(cdsl_photo_500$std.error), mean(cdsl_photo_600$std.error), mean(cdsl_photo_700$std.error), mean(cdsl_photo_800$std.error), mean(cdsl_photo_900$std.error), mean(cdsl_photo_1000$std.error)), "group" =  c(mean(cdsl_group_400$std.error), mean(cdsl_group_500$std.error), mean(cdsl_group_600$std.error), mean(cdsl_group_700$std.error), mean(cdsl_group_800$std.error), mean(cdsl_group_900$std.error), mean(cdsl_group_1000$std.error)), "night"= c(mean(cdsl_night_400$std.error), mean(cdsl_night_500$std.error), mean(cdsl_night_600$std.error), mean(cdsl_night_700$std.error), mean(cdsl_night_800$std.error), mean(cdsl_night_900$std.error), mean(cdsl_night_1000$std.error)), row.names = c(400, 500, 600, 700, 800, 900, 1000))



together <- rbind(real, prediction)
together$number <- c(400, 500,  600,700, 800, 900, 1000, 400, 500,  600,700, 800, 900, 1000)
together$type <- c("real", "real", "real", "real", "real", "real", "real","prediction", "prediction", "prediction", "prediction", "prediction", "prediction", "prediction")

#together
library(data.table)
melted <- melt(setDT(together), id.vars = c("number","type"), variable.name = "variable")
melted <- as.data.frame(melted)


```

```{r}
melted %>%
  filter(variable != "(Intercept)") %>%
  ggplot(aes(x = number, y = value)) +
  facet_wrap(~ variable) +
  geom_line(aes(color = type, linetype = type)) +
  labs(
    y = "Standard errors",
    x = "Number of expert annotations",
    title = "Power analysis compared to true standard errors (Won et al, 2017)"
  ) + theme_bw() +
  theme(
    plot.title = element_text(size = 16), # Increase title font size
    axis.title = element_text(size = 14),               # Increase axis title font size
    axis.text = element_text(size = 12),                # Increase axis text font size
    strip.text = element_text(size = 14),               # Increase facet label font size
    legend.text = element_text(size = 14),              # Increase legend text font size
    legend.title = element_text(size = 14)              # Increase legend title font size
  ) +
  scale_color_manual(values = c("darkred", "steelblue")) +
  scale_linetype_manual(values = c("dashed", "solid"))
```

```{r}
melted %>%
  filter(variable != "(Intercept)") %>%
  ggplot(aes(x = number, y = value)) +
  facet_wrap(~ variable) +
  geom_line(aes(color = type, linetype = type)) +
  labs(
    y = "Standard errors",
    x = "Number of expert annotations",
    title = "Power analysis compared to true standard errors (Won et al, 2017)"
  ) + 
  theme_bw() +
  theme(
    plot.title = element_text(size = 16),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12),
    strip.text = element_text(size = 14)
  ) +
  scale_color_manual(values = c("prediction" = "darkred", "real" = "steelblue")) +
  scale_linetype_manual(values = c("prediction" = "dashed", "real" = "solid")) +
  guides(color = "none", linetype = "none") +  # Remove legend
  # Apply text annotation only to the "photo" facet
  geom_text(
    data = melted %>% filter(variable == "photo") %>% group_by(type) %>% slice_max(number),
    aes(x = number, y = value*1.035, label = type, color = type),
    fontface = "bold",
    hjust = 1.15)


```
